buildscripts/check_texi_refs.py

   1 #!/usr/bin/env python
   2
   3 """
   4 check_texi_refs.py
   5 Interactive Texinfo cross-references checking and fixing tool
   6
   7 """
   8
   9
  10 import sys
  11 import re
  12 import os
  13 import optparse
  14 import imp
  15
  16 outdir = 'out-www'
  17
  18 log = sys.stderr
  19 stdout = sys.stdout
  20
  21 file_not_found = 'file not found in include path'
  22
  23 warn_not_fixed = '*** Warning: this broken x-ref has not been fixed!\n'
  24
  25 opt_parser = optparse.OptionParser (usage='check_texi_refs.py [OPTION]... FILE',
  26                                     description='''Check and fix \
  27 cross-references in a collection of Texinfo
  28 documents heavily cross-referenced each other.
  29 ''')
  30
  31 opt_parser.add_option ('-a', '--auto-fix',
  32                        help="Automatically fix cross-references whenever \
  33 it is possible",
  34                        action='store_true',
  35                        dest='auto_fix',
  36                        default=False)
  37
  38 opt_parser.add_option ('-b', '--batch',
  39                        help="Do not run interactively",
  40                        action='store_false',
  41                        dest='interactive',
  42                        default=True)
  43
  44 opt_parser.add_option ('-c', '--check-comments',
  45                        help="Also check commented out x-refs",
  46                        action='store_true',
  47                        dest='check_comments',
  48                        default=False)
  49
  50 opt_parser.add_option ('-p', '--check-punctuation',
  51                        help="Check punctuation after x-refs",
  52                        action='store_true',
  53                        dest='check_punctuation',
  54                        default=False)
  55
  56 opt_parser.add_option ("-I", '--include', help="add DIR to include path",
  57                        metavar="DIR",
  58                        action='append', dest='include_path',
  59                        default=[os.path.abspath (os.getcwd ())])
  60
  61 (options, files) = opt_parser.parse_args ()
  62
  63 class InteractionError (Exception):
  64     pass
  65
  66
  67 manuals_defs = imp.load_source ('manuals_defs', files[0])
  68 manuals = {}
  69
  70 def find_file (name, prior_directory='.'):
  71     p = os.path.join (prior_directory, name)
  72     out_p = os.path.join (prior_directory, outdir, name)
  73     if os.path.isfile (p):
  74         return p
  75     elif os.path.isfile (out_p):
  76         return out_p
  77
  78     # looking for file in include_path
  79     for d in options.include_path:
  80         p = os.path.join (d, name)
  81         if os.path.isfile (p):
  82             return p
  83
  84     # file not found in include_path: looking in `outdir' subdirs
  85     for d in options.include_path:
  86         p = os.path.join (d, outdir, name)
  87         if os.path.isfile (p):
  88             return p
  89
  90     raise EnvironmentError (1, file_not_found, name)
  91
  92
  93 exit_code = 0
  94
  95 def set_exit_code (n):
  96     global exit_code
  97     exit_code = max (exit_code, n)
  98
  99
 100 if options.interactive:
 101     try:
 102         import readline
 103     except:
 104         pass
 105
 106     def yes_prompt (question, default=False, retries=3):
 107         d = {True: 'y', False: 'n'}.get (default, False)
 108         while retries:
 109             a = raw_input ('%s [default: %s]' % (question, d) + '\n')
 110             if a.lower ().startswith ('y'):
 111                 return True
 112             if a.lower ().startswith ('n'):
 113                 return False
 114             if a == '' or retries < 0:
 115                 return default
 116             stdout.write ("Please answer yes or no.\n")
 117             retries -= 1
 118
 119     def search_prompt ():
 120         """Prompt user for a substring to look for in node names.
 121
 122 If user input is empty or matches no node name, return None,
 123 otherwise return a list of (manual, node name, file) tuples.
 124
 125 """
 126         substring = raw_input ("Enter a substring to search in node names \
 127 (press Enter to skip this x-ref):\n")
 128         if not substring:
 129             return None
 130         substring = substring.lower ()
 131         matches = []
 132         for k in manuals:
 133             matches += [(k, node, manuals[k]['nodes'][node][0])
 134                         for node in manuals[k]['nodes']
 135                         if substring in node.lower ()]
 136         return matches
 137
 138 else:
 139     def yes_prompt (question, default=False, retries=3):
 140         return default
 141
 142     def search_prompt ():
 143         return None
 144
 145
 146 ref_re = re.compile (r'@(ref|ruser|rlearning|rprogram|rglos)\{([^,\\]*?)\}(.)',
 147                      re.DOTALL)
 148 node_include_re = re.compile (r'(?m)^@(node|include)\s+(.+?)$')
 149
 150 whitespace_re = re.compile (r'\s+')
 151 line_start_re = re.compile ('(?m)^')
 152
 153 def which_line (index, newline_indices):
 154     """Calculate line number of a given string index
 155
 156 Return line number of string index index, where
 157 newline_indices is an ordered iterable of all newline indices.
 158 """
 159     inf = 0
 160     sup = len (newline_indices) - 1
 161     n = len (newline_indices)
 162     while inf + 1 != sup:
 163         m = (inf + sup) / 2
 164         if index >= newline_indices [m]:
 165             inf = m
 166         else:
 167             sup = m
 168     return inf + 1
 169
 170
 171 comments_re = re.compile ('(?<!@)(@c(?:omment)? \
 172 .*?\\n|^@ignore\\n.*?\\n@end ignore\\n)', re.M | re.S)
 173
 174 def calc_comments_boundaries (texinfo_doc):
 175     return [(m.start (), m.end ()) for m in comments_re.finditer (texinfo_doc)]
 176
 177
 178 def is_commented_out (start, end, comments_boundaries):
 179     for k in range (len (comments_boundaries)):
 180         if (start > comments_boundaries[k][0]
 181             and end <= comments_boundaries[k][1]):
 182             return True
 183         elif end <= comments_boundaries[k][0]:
 184             return False
 185     return False
 186
 187
 188 def read_file (f, d):
 189     s = open (f).read ()
 190     base = os.path.basename (f)
 191     dir = os.path.dirname (f)
 192
 193     d['contents'][f] = s
 194
 195     d['newline_indices'][f] = [m.end () for m in line_start_re.finditer (s)]
 196     if options.check_comments:
 197         d['comments_boundaries'][f] = []
 198     else:
 199         d['comments_boundaries'][f] = calc_comments_boundaries (s)
 200
 201     for m in node_include_re.finditer (s):
 202         if m.group (1) == 'node':
 203             line = which_line (m.start (), d['newline_indices'][f])
 204             d['nodes'][m.group (2)] = (f, line)
 205
 206         elif m.group (1) == 'include':
 207             try:
 208                 p = find_file (m.group (2), dir)
 209             except EnvironmentError, (errno, strerror):
 210                 if strerror == file_not_found:
 211                     continue
 212                 else:
 213                     raise
 214             read_file (p, d)
 215
 216
 217 def read_manual (name):
 218     """Look for all node names and cross-references in a Texinfo document
 219
 220 Return a (manual, dictionary) tuple where manual is the cross-reference
 221 macro name defined by references_dict[name], and dictionary
 222 has the following keys:
 223
 224   'nodes' is a dictionary of `node name':(file name, line number),
 225
 226   'contents' is a dictionary of file:`full file contents',
 227
 228   'newline_indices' is a dictionary of
 229 file:[list of beginning-of-line string indices],
 230
 231   'comments_boundaries' is a list of (start, end) tuples,
 232 which contain string indices of start and end of each comment.
 233
 234 Included files that can be found in the include path are processed too.
 235
 236 """
 237     d = {}
 238     d['nodes'] = {}
 239     d['contents'] = {}
 240     d['newline_indices'] = {}
 241     d['comments_boundaries'] = {}
 242     manual = manuals_defs.references_dict.get (name, '')
 243     try:
 244         f = find_file (name + '.tely')
 245     except EnvironmentError, (errno, strerror):
 246         if not strerror == file_not_found:
 247             raise
 248         else:
 249             try:
 250                 f = find_file (name + '.texi')
 251             except EnvironmentError, (errno, strerror):
 252                 if strerror == file_not_found:
 253                     sys.stderr.write (name + '.{texi,tely}: ' +
 254                                       file_not_found + '\n')
 255                     return (manual, d)
 256                 else:
 257                     raise
 258
 259     log.write ("Processing manual %s (%s)\n" % (f, manual))
 260     read_file (f, d)
 261     return (manual, d)
 262
 263
 264 log.write ("Reading files...\n")
 265
 266 manuals = dict ([read_manual (name)
 267                  for name in manuals_defs.references_dict.keys ()])
 268
 269 ref_fixes = set ()
 270 bad_refs_count = 0
 271 fixes_count = 0
 272
 273 def add_fix (old_type, old_ref, new_type, new_ref):
 274     ref_fixes.add ((old_type, old_ref, new_type, new_ref))
 275
 276
 277 def lookup_fix (r):
 278     found = []
 279     for (old_type, old_ref, new_type, new_ref) in ref_fixes:
 280         if r == old_ref:
 281             found.append ((new_type, new_ref))
 282     return found
 283
 284
 285 def preserve_linebreak (text, linebroken):
 286     if linebroken:
 287         if ' ' in text:
 288             text = text.replace (' ', '\n', 1)
 289             n = ''
 290         else:
 291             n = '\n'
 292     else:
 293         n = ''
 294     return (text, n)
 295
 296
 297 def choose_in_numbered_list (message, string_list, sep=' ', retries=3):
 298     S = set (string_list)
 299     S.discard ('')
 300     string_list = list (S)
 301     numbered_list = sep.join ([str (j + 1) + '. ' + string_list[j]
 302                                for j in range (len (string_list))]) + '\n'
 303     t = retries
 304     while t > 0:
 305         value = ''
 306         stdout.write (message +
 307                       "(press Enter to discard and start a new search)\n")
 308         input = raw_input (numbered_list)
 309         if not input:
 310             return ''
 311         try:
 312             value = string_list[int (input) - 1]
 313         except IndexError:
 314             stdout.write ("Error: index number out of range\n")
 315         except ValueError:
 316             matches = [input in v for v in string_list]
 317             n = matches.count (True)
 318             if n == 0:
 319                 stdout.write ("Error: input matches no item in the list\n")
 320             elif n > 1:
 321                 stdout.write ("Error: ambiguous input (matches several items \
 322 in the list)\n")
 323             else:
 324                 value = string_list[matches.index (True)]
 325         if value:
 326             return value
 327         t -= 1
 328     raise InteractionError ("%d retries limit exceeded" % retries)
 329
 330
 331 def check_ref (manual, file, m):
 332     global fixes_count, bad_refs_count
 333     bad_ref = False
 334     fixed = True
 335     type = m.group (1)
 336     original_name = m.group (2)
 337     name = whitespace_re.sub (' ', original_name). strip ()
 338     newline_indices = manuals[manual]['newline_indices'][file]
 339     line = which_line (m.start (), newline_indices)
 340     linebroken = '\n' in m.group (2)
 341     next_char = m.group (3)
 342     commented_out = is_commented_out \
 343         (m.start (), m.end (), manuals[manual]['comments_boundaries'][file])
 344     useful_fix = not outdir in file
 345
 346     # check puncuation after x-ref
 347     if options.check_punctuation and not next_char in '.,;:!?':
 348         stdout.write ("Warning: %s: %d: `%s': x-ref \
 349 not followed by punctuation\n" % (file, line, name))
 350
 351     # validate xref
 352     explicit_type = type
 353     new_name = name
 354
 355     if type != 'ref' and type == manual and not commented_out and useful_fix:
 356         bad_ref = True
 357         stdout.write ("\n%s: %d: `%s': external %s x-ref should be internal\n"
 358                       % (file, line, name, type))
 359         if options.auto_fix or yes_prompt ("Fix this?"):
 360             type = 'ref'
 361
 362     if type == 'ref':
 363         explicit_type = manual
 364
 365     if not name in manuals[explicit_type]['nodes'] and not commented_out:
 366         bad_ref = True
 367         fixed = False
 368         stdout.write ('\n')
 369         if type == 'ref':
 370             stdout.write ("%s: %d: `%s': wrong internal x-ref\n"
 371                           % (file, line, name))
 372         else:
 373             stdout.write ("%s: %d: `%s': wrong external `%s' x-ref\n"
 374                           % (file, line, name, type))
 375         # print context
 376         stdout.write ('--\n' + manuals[manual]['contents'][file]
 377                       [newline_indices[max (0, line - 2)]:
 378                        newline_indices[min (line + 3,
 379                                             len (newline_indices) - 1)]] +
 380                       '--\n')
 381
 382         # try to find the reference in other manuals
 383         found = []
 384         for k in [k for k in manuals if k != explicit_type]:
 385             if name in manuals[k]['nodes']:
 386                 if k == manual:
 387                     found = ['ref']
 388                     stdout.write ("  found as internal x-ref\n")
 389                     break
 390                 else:
 391                     found.append (k)
 392                     stdout.write ("  found as `%s' x-ref\n" % k)
 393
 394         if (len (found) == 1
 395             and (options.auto_fix or yes_prompt ("Fix this x-ref?"))):
 396             add_fix (type, name, found[0], name)
 397             type = found[0]
 398             fixed = True
 399
 400         elif len (found) > 1 and useful_fix:
 401             if options.interactive or options.auto_fix:
 402                 stdout.write ("* Several manuals contain this node name, \
 403 cannot determine manual automatically.\n")
 404             if options.interactive:
 405                 t = choose_in_numbered_list ("Choose manual for this x-ref by \
 406 index number or beginning of name:\n", found)
 407                 if t:
 408                     add_fix (type, name, t, name)
 409                     type = t
 410                     fixed = True
 411
 412         if not fixed:
 413             # try to find a fix already made
 414             found = lookup_fix (name)
 415
 416             if len (found) == 1:
 417                 stdout.write ("Found one previous fix: %s `%s'\n" % found[0])
 418                 if options.auto_fix or yes_prompt ("Apply this fix?"):
 419                     type, new_name = found[0]
 420                     fixed = True
 421
 422             elif len (found) > 1:
 423                 if options.interactive or options.auto_fix:
 424                     stdout.write ("* Several previous fixes match \
 425 this node name, cannot fix automatically.\n")
 426                 if options.interactive:
 427                     concatened = choose_in_numbered_list ("Choose new manual \
 428 and x-ref by index number or beginning of name:\n", [''.join ([i[0], ' ', i[1]])
 429                                                      for i in found],
 430                                                     sep='\n')
 431                     if concatened:
 432                         type, new_name = concatenated.split (' ', 1)
 433                         fixed = True
 434
 435         if not fixed:
 436             # all previous automatic fixes attempts failed,
 437             # ask user for substring to look in node names
 438             while True:
 439                 node_list = search_prompt ()
 440                 if node_list == None:
 441                     if options.interactive:
 442                         stdout.write (warn_not_fixed)
 443                     break
 444                 elif not node_list:
 445                     stdout.write ("No matched node names.\n")
 446                 else:
 447                     concatenated = choose_in_numbered_list ("Choose \
 448 node name and manual for this x-ref by index number or beginning of name:\n", \
 449                             [' '.join ([i[0], i[1], '(in %s)' % i[2]])
 450                              for i in node_list],
 451                                                             sep='\n')
 452                     if concatenated:
 453                         t, z = concatenated.split (' ', 1)
 454                         new_name = z.split (' (in ', 1)[0]
 455                         add_fix (type, name, t, new_name)
 456                         type = t
 457                         fixed = True
 458                         break
 459
 460     if fixed and type == manual:
 461         type = 'ref'
 462     bad_refs_count += int (bad_ref)
 463     if bad_ref and not useful_fix:
 464         stdout.write ("*** Warning: this file is automatically generated, \
 465 please fix the code source instead of generated documentation.\n")
 466
 467     # compute returned string
 468     if new_name == name:
 469         return ('@%s{%s}' % (type, original_name)) + next_char
 470     else:
 471         fixes_count += 1
 472         (ref, n) = preserve_linebreak (new_name, linebroken)
 473         return ('@%s{%s}' % (type, ref)) + next_char + n
 474
 475
 476 log.write ("Checking cross-references...\n")
 477
 478 try:
 479     for key in manuals:
 480         for file in manuals[key]['contents']:
 481             s = ref_re.sub (lambda m: check_ref (key, file, m),
 482                             manuals[key]['contents'][file])
 483             if s != manuals[key]['contents'][file]:
 484                 open (file, 'w').write (s)
 485 except KeyboardInterrupt:
 486     log.write ("Operation interrupted, exiting.\n")
 487     sys.exit (2)
 488 except InteractionError, instance:
 489     log.write ("Operation refused by user: %s\nExiting.\n" % instance)
 490     sys.exit (3)
 491
 492 log.write ("Done: %d bad x-refs found, fixed %d.\n" %
 493            (bad_refs_count, fixes_count))