scripts/auxiliar/fixcc.py

   1 #!/usr/bin/env python
   2
   3 # fixcc -- nitpick lily's c++ code
   4
   5 # TODO
   6 #  * maintainable rules: regexp's using whitespace (?x) and match names
   7 #    <identifier>)
   8 #  * trailing `*' vs. function definition
   9 #  * do not break/change indentation of fixcc-clean files
  10 #  * check lexer, parser
  11 #  * rewrite in elisp, add to cc-mode
  12 #  * using regexes is broken by design
  13 #  * ?
  14 #  * profit
  15
  16 import __main__
  17 import getopt
  18 import os
  19 import re
  20 import string
  21 import sys
  22 import time
  23
  24 COMMENT = 'COMMENT'
  25 STRING = 'STRING'
  26 GLOBAL_CXX = 'GC++'
  27 CXX = 'C++'
  28 verbose_p = 0
  29 indent_p = 0
  30
  31 rules = {
  32     GLOBAL_CXX:
  33     [
  34     # delete gratuitous block
  35     ('''\n(    |\t)\s*{\n\s*(.*?)(?![{}]|\b(do|for|else|if|switch|while)\b);\n\s*}''',
  36     '\n\\2;'),
  37     ],
  38     CXX:
  39     [
  40     # space before parenthesis open
  41     ('([^\( \]])[ \t]*\(', '\\1 ('),
  42     # space after comma
  43     ("\([^'],\)[ \t]*", '\1 '),
  44     # delete gratuitous block
  45     ('''\n(    |\t)\s*{\n\s*(.*?)(?![{}]|\b(do|for|else|if|switch|while)\b);\n\s*}''',
  46     '\n\\2;'),
  47     # delete inline tabs
  48     ('(\w)\t+', '\\1 '),
  49     # delete inline double spaces
  50     ('   *', ' '),
  51     # delete space after parenthesis open
  52     ('\([ \t]*', '('),
  53     # delete space before parenthesis close
  54     ('[ \t]*\)', ')'),
  55     # delete spaces after prefix
  56     ('(--|\+\+)[ \t]*([\w\)])', '\\1\\2'),
  57     # delete spaces before postfix
  58     ('([\w\)\]])[ \t]*(--|\+\+)', '\\1\\2'),
  59     # delete space after parenthesis close
  60     #('\)[ \t]*([^\w])', ')\\1'),
  61     # delete space around operator
  62     # ('([\w\(\)\]])([ \t]*)(::|\.)([ \t]*)([\w\(\)])', '\\1\\3\\5'),
  63     ('([\w\(\)\]])([ \t]*)(\.|->)([ \t]*)([\w\(\)])', '\\1\\3\\5'),
  64     # delete space after operator
  65     ('(::)([ \t]*)([\w\(\)])', '\\1\\3'),
  66     # delete superflous space around operator
  67     ('([\w\(\)\]])([ \t]+)(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|\+|-|=|/|:|&|\||\*)([ \t]+)([\w\(\)])', '\\1 \\3 \\5'),
  68     # space around operator1
  69     ('([\w\)\]]) *(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|=|/|:|&|\||\*) *([\w\(])', '\\1 \\2 \\3'),
  70     # space around operator2
  71     ('([\w\)\]]) *(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|=|/|:|&|\||\*) ([^\w\s])', '\\1 \\2 \\3'),
  72     # space around operator3
  73     ('([^\w\s]) (&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|[^-]>|=|/|:|&|\||\*) *([\w\(])', '\\1 \\2 \\3'),
  74     # space around operator4
  75     ('([\w\(\)\]]) (\*|/|\+|-) *([-:])', '\\1 \\2 \\3'),
  76     # space around +/-; exponent
  77     ('([\w\)\]])(\+|-)([_A-Za-z\(])', '\\1 \\2 \\3'),
  78     ('([_\dA-Za-df-z\)\]])(\+|-)([\w\(])', '\\1 \\2 \\3'),
  79     # trailing operator
  80     (' (::|&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|\+|-|=|/|:|&XXX|\||\*XXX)[ \t]*\n([ \t]*)',         '\n\\2\\1 '),
  81     # pointer
  82     ##('(bool|char|const|delete|int|stream|unsigned|void|size_t|struct \w+|[A-Z]\w*|,|;|&&|<|[^-]>|\|\||-|\+)[ \t]*(\*|&)[ \t]*', '\\1 \\2'),
  83     ('(bool|char|const|delete|int|stream|unsigned|void|size_t|struct \w+|[A-Z]\w*|,|;|:|=|\?\)|&&|<|[^-]>|\|\||-|\+)[ \t]*(\*|&)[ \t]*', '\\1 \\2'),
  84     #to#('(bool|char|const|delete|int|stream|unsigned|void|([A-Z]\w*)|[,])[ \n\t]*(\*|&)[ \t]*', '\\1 \\3'),
  85     # pointer with template
  86     ('(( *((bool|char|const|delete|int|stream|unsigned|void|size_t|class[ \t]+\w*|[A-Z]\w*|\w+::\w+|[,])[ \*&],*)+)>) *(\*|&) *', '\\1 \\5'),
  87     #to#('(( *((bool|char|delete|int|stream|unsigned|void|(class[ \t]+\w*)|([A-Z]\w*)|[,])[ \*&],*)+)>)[ \t\n]*(\*|&) *', '\\1 \\7'),
  88     # unary pointer, minus, not
  89     ('(return|=) (\*|&|-|!) ([\w\(])', '\\1 \\2\\3'),
  90     # space after `operator'
  91     ('(\Woperator) *([^\w\s])', '\\1 \\2'),
  92     # dangling brace close
  93     ('\n[ \t]*(\n[ \t]*})', '\\1'),
  94     # dangling newline
  95     ('\n[ \t]*\n[ \t]*\n', '\n\n'),
  96     # dangling parenthesis open
  97     #('[ \t]*\n[ \t]*\([ \t]*\n', '('),
  98     ('\([ \t]*\n', '('),
  99     # dangling parenthesis close
 100     ('\n[ \t]*\)', ')'),
 101     # dangling comma
 102     ('\n[ \t]*,', ','),
 103     # dangling semicolon
 104     ('\n[ \t]*;', ';'),
 105     # brace open
 106     ('(\w)[ \t]*([^\s]*){([ \t]*\n)', '\\1\\2\n{\n'),
 107     # brace open backslash
 108     ('(\w[^\n]*){[ \t]*\\\\\n', '\\1\\\n{\\\n'),
 109     # brace close
 110     ("}[ \t]*([^'\n]*\w[^\n\\\]*)\n", '}\n\\1\n'),
 111     # brace close backslash
 112     ("}[ \t]*([^'\n]*\w[^\n\\\]*)", '\n}\n\\1'),
 113     # delete space after `operator'
 114     #('(\Woperator) (\W)', '\\1\\2'),
 115     # delete space after case, label
 116     ('(\W(case|label) ([\w]+)) :', '\\1:'),
 117     # delete space before comma
 118     ('[ \t]*,', ','),
 119     # delete space before semicolon
 120     ('[ \t]*;', ';'),
 121     # delete space before eol-backslash
 122     ('[ \t]*\\\\\n', '\\\n'),
 123     # delete trailing whitespace
 124     ('[ \t]*\n', '\n'),
 125
 126     ## Deuglify code that also gets ugly by rules above.
 127     # delete newline after typedef struct
 128     ('(typedef struct\s+([\w]*\s){([^}]|{[^}]*})*})\s*\n\s*(\w[\w\d]*;)', '\\1 \\4'),
 129     # delete spaces around template brackets
 130     #('(dynamic_cast|template|([A-Z]\w*))[ \t]*<[ \t]*(( *(bool|char|int|unsigned|void|(class[ \t]+\w*)|([A-Z]\w*)),?)+)[ \t]?(| [\*&])[ \t]*>', '\\1<\\3\\8>'),
 131     ('(dynamic_cast|template|typedef|\w+::\w+|[A-Z]\w*)[ \t]*<[ \t]*(( *(bool|char|const|int|unsigned|void|size_t|class[ \t]+\w*|[A-Z]\w*)( *[\*&]?,|[\*&])*)+)[ \t]?(| [\*&])[ \t]*>', '\\1<\\2\\6>'),
 132     ('(\w+::\w+|[A-Z]\w*) < ((\w+::\w+|[A-Z]\w*)<[A-Z]\w*>) >', '\\1<\\2 >'),
 133     ('((if|while)\s+\(([^\)]|\([^\)]*\))*\))\s*;', '\\1\n;'),
 134     ('(for\s+\(([^;]*;[^;]*;([^\)]|\([^\)]*\))*)\))\s*;', '\\1\n;'),
 135     # do {..} while
 136     ('(}\s*while\s*)(\(([^\)]|\([^\)]*\))*\))\s*;', '\\1\\2;'),
 137
 138     ## Fix code that gets broken by rules above.
 139     ##('->\s+\*', '->*'),
 140     # delete space before #define x()
 141     ('#[ \t]*define (\w*)[ \t]*\(', '#define \\1('),
 142     # add space in #define x ()
 143     ('#[ \t]*define (\w*)(\(([^\(\)]|\([^\(\)]*\))*\)\\n)',
 144     '#define \\1 \\2'),
 145     # delete space in #include <>
 146     ('#[ \t]*include[ \t]*<[ \t]*([^ \t>]*)[ \t]*(/?)[ \t]*([^ \t>]*)[ \t]*>',
 147     '#include <\\1\\2\\3>'),
 148     # delete backslash before empty line (emacs' indent region is broken)
 149     ('\\\\\n\n', '\n\n'),
 150     ],
 151
 152     COMMENT:
 153     [
 154     # delete trailing whitespace
 155     ('[ \t]*\n', '\n'),
 156     # delete empty first lines
 157     ('(/\*\n)\n*', '\\1'),
 158     # delete empty last lines
 159     ('\n*(\n\*/)', '\\1'),
 160     ## delete newline after start?
 161     #('/(\*)\n', '\\1'),
 162     ## delete newline before end?
 163     #('\n(\*/)', '\\1'),
 164     ],
 165     }
 166
 167 # Recognize special sequences in the input.
 168 #
 169 #   (?P<name>regex) -- Assign result of REGEX to NAME.
 170 #   *? -- Match non-greedily.
 171 #   (?m) -- Multiline regex: Make ^ and $ match at each line.
 172 #   (?s) -- Make the dot match all characters including newline.
 173 #   (?x) -- Ignore whitespace in patterns.
 174 no_match = 'a\ba'
 175 snippet_res = {
 176     CXX: {
 177     'multiline_comment':
 178     r'''(?sx)
 179     (?P<match>
 180     (?P<code>
 181     [ \t]*/\*.*?\*/))''',
 182
 183     'singleline_comment':
 184     r'''(?mx)
 185     ^.*
 186     (?P<match>
 187     (?P<code>
 188     [ \t]*//([ \t][^\n]*|)\n))''',
 189
 190     'string':
 191     r'''(?x)
 192     (?P<match>
 193     (?P<code>
 194     "([^\"\n](\")*)*"))''',
 195
 196     'char':
 197     r'''(?x)
 198     (?P<match>
 199     (?P<code>
 200     '([^']+|\')))''',
 201
 202      'include':
 203      r'''(?x)
 204      (?P<match>
 205      (?P<code>
 206      "#[ \t]*include[ \t]*<[^>]*>''',
 207      },
 208      }
 209
 210 class Chunk:
 211     def replacement_text (self):
 212         return ''
 213
 214     def filter_text (self):
 215         return self.replacement_text ()
 216
 217 class Substring (Chunk):
 218     def __init__ (self, source, start, end):
 219         self.source = source
 220         self.start = start
 221         self.end = end
 222
 223     def replacement_text (self):
 224         s = self.source[self.start:self.end]
 225         if verbose_p:
 226             sys.stderr.write ('CXX Rules')
 227         for i in rules[CXX]:
 228             if verbose_p:
 229                 sys.stderr.write ('.')
 230                 #sys.stderr.write ('\n\n***********\n')
 231                 #sys.stderr.write (i[0])
 232                 #sys.stderr.write ('\n***********\n')
 233                 #sys.stderr.write ('\n=========>>\n')
 234                 #sys.stderr.write (s)
 235                 #sys.stderr.write ('\n<<=========\n')
 236             s = re.sub (i[0], i[1], s)
 237         if verbose_p:
 238             sys.stderr.write ('done\n')
 239         return s
 240
 241
 242 class Snippet (Chunk):
 243     def __init__ (self, type, match, format):
 244         self.type = type
 245         self.match = match
 246         self.hash = 0
 247         self.options = []
 248         self.format = format
 249
 250     def replacement_text (self):
 251         return self.match.group ('match')
 252
 253     def substring (self, s):
 254         return self.match.group (s)
 255
 256     def __repr__ (self):
 257         return `self.__class__` + ' type = ' + self.type
 258
 259 class Multiline_comment (Snippet):
 260     def __init__ (self, source, match, format):
 261         self.type = type
 262         self.match = match
 263         self.hash = 0
 264         self.options = []
 265         self.format = format
 266
 267     def replacement_text (self):
 268         s = self.match.group ('match')
 269         if verbose_p:
 270             sys.stderr.write ('COMMENT Rules')
 271         for i in rules[COMMENT]:
 272             if verbose_p:
 273                 sys.stderr.write ('.')
 274             s = re.sub (i[0], i[1], s)
 275         return s
 276
 277 snippet_type_to_class = {
 278     'multiline_comment': Multiline_comment,
 279 #        'string': Multiline_comment,
 280 #        'include': Include_snippet,
 281 }
 282
 283 def find_toplevel_snippets (s, types):
 284     if verbose_p:
 285         sys.stderr.write ('Dissecting')
 286
 287     res = {}
 288     for i in types:
 289         res[i] = re.compile (snippet_res[format][i])
 290
 291     snippets = []
 292     index = 0
 293     ## found = dict (map (lambda x: (x, None),
 294     ##                      types))
 295     ## urg python2.1
 296     found = {}
 297     map (lambda x, f = found: f.setdefault (x, None),
 298       types)
 299
 300     # We want to search for multiple regexes, without searching
 301     # the string multiple times for one regex.
 302     # Hence, we use earlier results to limit the string portion
 303     # where we search.
 304     # Since every part of the string is traversed at most once for
 305     # every type of snippet, this is linear.
 306
 307     while 1:
 308         if verbose_p:
 309             sys.stderr.write ('.')
 310         first = None
 311         endex = 1 << 30
 312         for type in types:
 313             if not found[type] or found[type][0] < index:
 314                 found[type] = None
 315                 m = res[type].search (s[index:endex])
 316                 if not m:
 317                     continue
 318
 319                 cl = Snippet
 320                 if snippet_type_to_class.has_key (type):
 321                     cl = snippet_type_to_class[type]
 322                 snip = cl (type, m, format)
 323                 start = index + m.start ('match')
 324                 found[type] = (start, snip)
 325
 326             if found[type] \
 327              and (not first \
 328                 or found[type][0] < found[first][0]):
 329                 first = type
 330
 331                 # FIXME.
 332
 333                 # Limiting the search space is a cute
 334                 # idea, but this *requires* to search
 335                 # for possible containing blocks
 336                 # first, at least as long as we do not
 337                 # search for the start of blocks, but
 338                 # always/directly for the entire
 339                 # @block ... @end block.
 340
 341                 endex = found[first][0]
 342
 343         if not first:
 344             snippets.append (Substring (s, index, len (s)))
 345             break
 346
 347         (start, snip) = found[first]
 348         snippets.append (Substring (s, index, start))
 349         snippets.append (snip)
 350         found[first] = None
 351         index = start + len (snip.match.group ('match'))
 352
 353     return snippets
 354
 355 def nitpick_file (outdir, file):
 356     s = open (file).read ()
 357
 358     for i in rules[GLOBAL_CXX]:
 359         s = re.sub (i[0], i[1], s)
 360
 361     # FIXME: Containing blocks must be first, see
 362     #        find_toplevel_snippets.
 363     #        We leave simple strings be part of the code
 364     snippet_types = (
 365         'multiline_comment',
 366         'singleline_comment',
 367         'string',
 368 #                'char',
 369         )
 370
 371     chunks = find_toplevel_snippets (s, snippet_types)
 372     #code = filter (lambda x: is_derived_class (x.__class__, Substring),
 373     #               chunks)
 374
 375     t = string.join (map (lambda x: x.filter_text (), chunks), '')
 376     fixt = file
 377     if s != t:
 378         if not outdir:
 379             os.system ('mv %s %s~' % (file, file))
 380         else:
 381             fixt = os.path.join (outdir,
 382                       os.path.basename (file))
 383         h = open (fixt, "w")
 384         h.write (t)
 385         h.close ()
 386     if s != t or indent_p:
 387         indent_file (fixt)
 388
 389 def indent_file (file):
 390     emacs = '''emacs\
 391     --no-window-system\
 392     --batch\
 393     --no-site-file\
 394     --no-init-file\
 395     %(file)s\
 396     --eval '(let ((error nil)
 397            (version-control nil))
 398         (load-library "cc-mode")
 399         (c++-mode)
 400         (indent-region (point-min) (point-max))
 401         (if (buffer-modified-p (current-buffer))
 402          (save-buffer)))' ''' % vars ()
 403     emacsclient = '''emacsclient\
 404     --socket-name=%(socketdir)s/%(socketname)s\
 405     --no-wait\
 406     --eval '(let ((error nil)
 407            (version-control nil))
 408         (load-library "cc-mode")
 409         (find-file "%(file)s")
 410         (c++-mode)
 411         (indent-region (point-min) (point-max))
 412         (if (buffer-modified-p (current-buffer))
 413          (save-buffer)))' ''' \
 414          % { 'file': file,
 415            'socketdir' : socketdir,
 416            'socketname' : socketname, }
 417     if verbose_p:
 418         sys.stderr.write (emacs)
 419         sys.stderr.write ('\n')
 420     os.system (emacs)
 421
 422
 423 def usage ():
 424     sys.stdout.write (r'''
 425 Usage:
 426 fixcc [OPTION]... FILE...
 427
 428 Options:
 429  --help
 430  --indent   reindent, even if no changes
 431  --verbose
 432  --test
 433
 434 Typical use with LilyPond:
 435
 436  fixcc $(find flower kpath-guile lily -name '*cc' -o -name '*hh' | grep -v /out)
 437
 438 This script is licensed under the GNU GPL
 439 ''')
 440
 441 def do_options ():
 442     global indent_p, outdir, verbose_p
 443     (options, files) = getopt.getopt (sys.argv[1:], '',
 444                      ['help', 'indent', 'outdir=',
 445                      'test', 'verbose'])
 446     for (o, a) in options:
 447         if o == '--help':
 448             usage ()
 449             sys.exit (0)
 450         elif o == '--indent':
 451             indent_p = 1
 452         elif o == '--outdir':
 453             outdir = a
 454         elif o == '--verbose':
 455             verbose_p = 1
 456         elif o == '--test':
 457             test ()
 458             sys.exit (0)
 459         else:
 460             assert unimplemented
 461     if not files:
 462         usage ()
 463         sys.exit (2)
 464     return files
 465
 466
 467 outdir = 0
 468 format = CXX
 469 socketdir = '/tmp/fixcc'
 470 socketname = 'fixcc%d' % os.getpid ()
 471
 472 def setup_client ():
 473     #--no-window-system\
 474     #--batch\
 475     os.unlink (os.path.join (socketdir, socketname))
 476     os.mkdir (socketdir, 0700)
 477     emacs='''emacs\
 478         --no-site-file\
 479         --no-init-file\
 480         --eval '(let ((error nil)
 481                (version-control nil))
 482             (load-library "server")
 483             (setq server-socket-dir "%(socketdir)s")
 484             (setq server-name "%(socketname)s")
 485             (server-start)
 486             (while t) (sleep 1000))' ''' \
 487             % { 'socketdir' : socketdir,
 488               'socketname' : socketname, }
 489
 490     if not os.fork ():
 491         os.system (emacs)
 492         sys.exit (0)
 493     while not os.path.exists (os.path.join (socketdir, socketname)):
 494         time.sleep (1)
 495
 496 def main ():
 497     #emacsclient should be faster, but this does not work yet
 498     #setup_client ()
 499     files = do_options ()
 500     if outdir and not os.path.isdir (outdir):
 501         os.makedirs (outdir)
 502     for i in files:
 503         sys.stderr.write ('%s...\n' % i)
 504         nitpick_file (outdir, i)
 505
 506
 507 ## TODO: make this compilable and check with g++
 508 TEST = '''
 509 #include <libio.h>
 510 #include <map>
 511 class
 512 ostream ;
 513
 514 class Foo {
 515 public: static char* foo ();
 516 std::map<char*,int>* bar (char, char) { return 0; }
 517 };
 518 typedef struct
 519 {
 520  Foo **bar;
 521 } String;
 522
 523 ostream &
 524 operator << (ostream & os, String d);
 525
 526 typedef struct _t_ligature
 527 {
 528  char *succ, *lig;
 529  struct _t_ligature * next;
 530 }  AFM_Ligature;
 531
 532 typedef std::map < AFM_Ligature const *, int > Bar;
 533
 534  /**
 535  (c) 1997--2009 Han-Wen Nienhuys <hanwen@cs.uu.nl>
 536  */
 537
 538 /*      ||
 539 *      vv
 540 * !OK  OK
 541 */
 542 /*     ||
 543    vv
 544  !OK  OK
 545 */
 546 char *
 547 Foo:: foo ()
 548 {
 549 int
 550 i
 551 ;
 552  char* a= &++ i ;
 553  a [*++ a] = (char*) foe (*i, &bar) *
 554  2;
 555  int operator double ();
 556  std::map<char*,int> y =*bar(-*a ,*b);
 557  Interval_t<T> & operator*= (T r);
 558  Foo<T>*c;
 559  int compare (Pqueue_ent < K, T > const& e1, Pqueue_ent < K,T> *e2);
 560  delete *p;
 561  if (abs (f)*2 > abs (d) *FUDGE)
 562   ;
 563  while (0);
 564  for (; i<x foo(); foo>bar);
 565  for (; *p && > y;
 566    foo > bar)
 567 ;
 568  do {
 569  ;;;
 570  }
 571  while (foe);
 572
 573  squiggle. extent;
 574  1 && * unsmob_moment (lf);
 575  line_spanner_ = make_spanner ("DynamicLineSpanner", rq ? rq->*self_scm
 576 (): SCM_EOL);
 577  case foo: k;
 578
 579  if (0) {a=b;} else {
 580  c=d;
 581  }
 582
 583  cookie_io_functions_t Memory_out_stream::functions_ = {
 584   Memory_out_stream::reader,
 585   ...
 586  };
 587
 588  int compare (Array < Pitch> *, Array < Pitch> *);
 589  original_ = (Grob *) & s;
 590  Drul_array< Link_array<Grob> > o;
 591 }
 592
 593  header_.char_info_pos = (6 + header_length) * 4;
 594  return ly_bool2scm (*ma < * mb);
 595
 596  1 *::sign(2);
 597
 598  (shift) *-d;
 599
 600  a = 0 ? *x : *y;
 601
 602 a = "foo() 2,2,4";
 603 {
 604  if (!span_)
 605   {
 606    span_ = make_spanner ("StaffSymbol", SCM_EOL);
 607   }
 608 }
 609 {
 610  if (!span_)
 611   {
 612    span_ = make_spanner (StaffSymbol, SCM_EOL);
 613   }
 614 }
 615 '''
 616
 617 def test ():
 618     test_file = 'fixcc.cc'
 619     open (test_file, 'w').write (TEST)
 620     nitpick_file (outdir, test_file)
 621     sys.stdout.write (open (test_file).read ())
 622
 623 if __name__ == '__main__':
 624     main ()
 625