Fix some C++ nitpicking rules in fixcc.py.
[lilypond/mpolesky.git] / scripts / auxiliar / fixcc.py
blob3928c60775b753747249b3f4a65fa203a5d3c01a
1 #!/usr/bin/env python
3 # fixcc -- nitpick lily's c++ code
5 # This file is part of LilyPond, the GNU music typesetter.
7 # LilyPond is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
12 # LilyPond is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with LilyPond. If not, see <http://www.gnu.org/licenses/>.
20 # TODO
21 # * maintainable rules: regexp's using whitespace (?x) and match names
22 # <identifier>)
23 # * trailing `*' vs. function definition
24 # * do not break/change indentation of fixcc-clean files
25 # * check lexer, parser
26 # * rewrite in elisp, add to cc-mode
27 # * using regexes is broken by design
28 # * ?
29 # * profit
31 import __main__
32 import getopt
33 import os
34 import re
35 import string
36 import sys
37 import time
39 COMMENT = 'COMMENT'
40 STRING = 'STRING'
41 GLOBAL_CXX = 'GC++'
42 CXX = 'C++'
43 verbose_p = 0
44 indent_p = 0
46 rules = {
47 GLOBAL_CXX:
49 # delete gratuitous block
50 ('''\n( |\t)\s*{\n\s*(.*?)(?![{}]|\b(do|for|else|if|switch|while)\b);\n\s*}''',
51 '\n\\2;'),
53 CXX:
55 # space before parenthesis open
56 ('([^\( \]])[ \t]*\(', '\\1 ('),
57 # space after comma
58 ("\([^'],\)[ \t]*", '\1 '),
59 # delete gratuitous block
60 ('''\n( |\t)\s*{\n\s*(.*?)(?![{}]|\b(do|for|else|if|switch|while)\b);\n\s*}''',
61 '\n\\2;'),
62 # delete inline tabs
63 ('(\w)\t+', '\\1 '),
64 # delete inline double spaces
65 (' *', ' '),
66 # delete space after parenthesis open
67 ('\([ \t]*', '('),
68 # delete space before parenthesis close
69 ('[ \t]*\)', ')'),
70 # delete spaces after prefix
71 ('(--|\+\+)[ \t]*([\w\)])', '\\1\\2'),
72 # delete spaces before postfix
73 ('([\w\)\]])[ \t]*(--|\+\+)', '\\1\\2'),
74 # delete space after parenthesis close
75 #('\)[ \t]*([^\w])', ')\\1'),
76 # delete space around operator
77 # ('([\w\(\)\]])([ \t]*)(::|\.)([ \t]*)([\w\(\)])', '\\1\\3\\5'),
78 ('([\w\(\)\]])([ \t]*)(\.|->)([ \t]*)([\w\(\)])', '\\1\\3\\5'),
79 # delete space after operator
80 ('(::)([ \t]*)([\w\(\)])', '\\1\\3'),
81 # delete superflous space around operator
82 ('([\w\(\)\]])([ \t]+)(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|\+|-|=|/|:|&|\||\*)([ \t]+)([\w\(\)])', '\\1 \\3 \\5'),
83 # space around operator1
84 ('([\w\)\]]) *(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|=|/|:|&|\||\*) *([\w\(])', '\\1 \\2 \\3'),
85 # space around operator2
86 ('([\w\)\]]) *(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|=|/|:|&|\||\*) ([^\w\s])', '\\1 \\2 \\3'),
87 # space around operator3
88 ('([^\w\s]) (&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|[^-]>|=|/|:|&|\||\*) *([\w\(])', '\\1 \\2 \\3'),
89 # space around operator4
90 ('([\w\(\)\]]) (\*|/|\+|-) *([-:])', '\\1 \\2 \\3'),
91 # space around +/-; exponent
92 ('([\w\)\]])(\+|-)([_A-Za-z\(])', '\\1 \\2 \\3'),
93 ('([_\dA-Za-df-z\)\]])(\+|-)([\w\(])', '\\1 \\2 \\3'),
94 # trailing operator
95 (' (::|&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|\+|-|=|/|:|&XXX|\||\*XXX)[ \t]*\n([ \t]*)', '\n\\2\\1 '),
96 # pointer
97 ##('(bool|char|const|delete|int|stream|unsigned|void|size_t|struct \w+|[A-Z]\w*|,|;|&&|<|[^-]>|\|\||-|\+)[ \t]*(\*|&)[ \t]*', '\\1 \\2'),
98 ('(bool|char|const|delete|int|stream|unsigned|void|vsize|size_t|struct \w+|[A-Z]\w*|,|;|:|=|\?\)|&&|<|[^-]>|\|\||-|\+)[ \t]*(\*|&)[ \t]*', '\\1 \\2'),
99 #to#('(bool|char|const|delete|int|stream|unsigned|void|([A-Z]\w*)|[,])[ \n\t]*(\*|&)[ \t]*', '\\1 \\3'),
100 # pointer with template
101 ('(( *((bool|char|const|delete|int|stream|unsigned|void|size_t|class[ \t]+\w*|[A-Z]\w*|\w+::\w+|[,])\s*[\*&],*)+)>) *(\*|&) *', '\\1 \\5'),
102 #to#('(( *((bool|char|delete|int|stream|unsigned|void|(class[ \t]+\w*)|([A-Z]\w*)|[,])[ \*&],*)+)>)[ \t\n]*(\*|&) *', '\\1 \\7'),
103 # unary pointer, minus, not
104 ('(return|=|&&|\|\|) (\*|&|-|!) ([\w\(])', '\\1 \\2\\3'),
105 # space after `operator'
106 ('(\Woperator) *([^\w\s])', '\\1 \\2'),
107 # dangling brace close
108 ('\n[ \t]*(\n[ \t]*})', '\\1'),
109 # dangling newline
110 ('\n[ \t]*\n[ \t]*\n', '\n\n'),
111 # dangling parenthesis open
112 #('[ \t]*\n[ \t]*\([ \t]*\n', '('),
113 ('\([ \t]*\n', '('),
114 # dangling parenthesis close
115 ('\n[ \t]*\)', ')'),
116 # dangling comma
117 ('\n[ \t]*,', ','),
118 # dangling semicolon
119 ('\n[ \t]*;', ';'),
120 # brace open
121 ('(\w)[ \t]*([^\s]*){([ \t]*\n)', '\\1\\2\n{\n'),
122 # brace open backslash
123 ('(\w[^\n]*){[ \t]*\\\\\n', '\\1\\\n{\\\n'),
124 # brace close
125 ("}[ \t]*([^'\n]*\w[^\n\\\]*)\n", '}\n\\1\n'),
126 # brace close backslash
127 ("}[ \t]*([^'\n]*\w[^\n\\\]*)", '\n}\n\\1'),
128 # delete space after `operator'
129 #('(\Woperator) (\W)', '\\1\\2'),
130 # delete space after case, label
131 ('(\W(case|label) ([\w]+)) :', '\\1:'),
132 # delete space before comma
133 ('[ \t]*,', ','),
134 # delete space before semicolon
135 ('[ \t]*;', ';'),
136 # delete space before eol-backslash
137 ('[ \t]*\\\\\n', '\\\n'),
138 # delete trailing whitespace
139 ('[ \t]*\n', '\n'),
141 ## Deuglify code that also gets ugly by rules above.
142 # delete newline after typedef struct
143 ('(typedef struct\s+([\w]*\s){([^}]|{[^}]*})*})\s*\n\s*(\w[\w\d]*;)', '\\1 \\4'),
144 # delete spaces around template brackets
145 #('(dynamic_cast|template|([A-Z]\w*))[ \t]*<[ \t]*(( *(bool|char|int|unsigned|void|(class[ \t]+\w*)|([A-Z]\w*)),?)+)[ \t]?(| [\*&])[ \t]*>', '\\1<\\3\\8>'),
146 ('(dynamic_cast|less|list|map|set|template|typedef|vector|\w+::\w+|[A-Z]\w*)[ \t]*<[ \t]*(( *(bool|char|const|string|int|unsigned|void|vsize|size_t|class[ \t]+\w*|[A-Z]\w*)( *[\*&]?,|[\*&])*)+)[ \t]?(| [\*&])[ \t]*>', '\\1<\\2\\6>'),
147 ('(\w+::\w+|[A-Z]\w*) < ((\w+::\w+|[A-Z]\w*)<[A-Z]\w*>) >', '\\1<\\2 >'),
148 ('((if|while)\s+\(([^\)]|\([^\)]*\))*\))\s*;', '\\1\n;'),
149 ('(for\s+\(([^;]*;[^;]*;([^\)]|\([^\)]*\))*)\))\s*;', '\\1\n;'),
150 # do {..} while
151 ('(}\s*while\s*)(\(([^\)]|\([^\)]*\))*\))\s*;', '\\1\\2;'),
153 ## Fix code that gets broken by rules above.
154 ##('->\s+\*', '->*'),
155 # delete space before #define x()
156 ('#[ \t]*define (\w*)[ \t]*\(', '#define \\1('),
157 # add space in #define x ()
158 ('#[ \t]*define (\w*)(\(([^\(\)]|\([^\(\)]*\))*\)\\n)',
159 '#define \\1 \\2'),
160 # delete space in #include <>
161 ('#[ \t]*include[ \t]*<[ \t]*([^ \t>]*)[ \t]*(/?)[ \t]*([^ \t>]*)[ \t]*>',
162 '#include <\\1\\2\\3>'),
163 # delete backslash before empty line (emacs' indent region is broken)
164 ('\\\\\n\n', '\n\n'),
167 COMMENT:
169 # delete trailing whitespace
170 ('[ \t]*\n', '\n'),
171 # delete empty first lines
172 ('(/\*\n)\n*', '\\1'),
173 # delete empty last lines
174 ('\n*(\n\*/)', '\\1'),
175 ## delete newline after start?
176 #('/(\*)\n', '\\1'),
177 ## delete newline before end?
178 #('\n(\*/)', '\\1'),
182 # Recognize special sequences in the input.
184 # (?P<name>regex) -- Assign result of REGEX to NAME.
185 # *? -- Match non-greedily.
186 # (?m) -- Multiline regex: Make ^ and $ match at each line.
187 # (?s) -- Make the dot match all characters including newline.
188 # (?x) -- Ignore whitespace in patterns.
189 no_match = 'a\ba'
190 snippet_res = {
191 CXX: {
192 'multiline_comment':
193 r'''(?sx)
194 (?P<match>
195 (?P<code>
196 [ \t]*/\*.*?\*/))''',
198 'singleline_comment':
199 r'''(?mx)
201 (?P<match>
202 (?P<code>
203 [ \t]*//([ \t][^\n]*|)\n))''',
205 'string':
206 r'''(?x)
207 (?P<match>
208 (?P<code>
209 "([^\"\n](\")*)*"))''',
211 'char':
212 r'''(?x)
213 (?P<match>
214 (?P<code>
215 '([^']+|\')))''',
217 'include':
218 r'''(?x)
219 (?P<match>
220 (?P<code>
221 "#[ \t]*include[ \t]*<[^>]*>''',
225 class Chunk:
226 def replacement_text (self):
227 return ''
229 def filter_text (self):
230 return self.replacement_text ()
232 class Substring (Chunk):
233 def __init__ (self, source, start, end):
234 self.source = source
235 self.start = start
236 self.end = end
238 def replacement_text (self):
239 s = self.source[self.start:self.end]
240 if verbose_p:
241 sys.stderr.write ('CXX Rules')
242 for i in rules[CXX]:
243 if verbose_p:
244 sys.stderr.write ('.')
245 #sys.stderr.write ('\n\n***********\n')
246 #sys.stderr.write (i[0])
247 #sys.stderr.write ('\n***********\n')
248 #sys.stderr.write ('\n=========>>\n')
249 #sys.stderr.write (s)
250 #sys.stderr.write ('\n<<=========\n')
251 s = re.sub (i[0], i[1], s)
252 if verbose_p:
253 sys.stderr.write ('done\n')
254 return s
257 class Snippet (Chunk):
258 def __init__ (self, type, match, format):
259 self.type = type
260 self.match = match
261 self.hash = 0
262 self.options = []
263 self.format = format
265 def replacement_text (self):
266 return self.match.group ('match')
268 def substring (self, s):
269 return self.match.group (s)
271 def __repr__ (self):
272 return `self.__class__` + ' type = ' + self.type
274 class Multiline_comment (Snippet):
275 def __init__ (self, source, match, format):
276 self.type = type
277 self.match = match
278 self.hash = 0
279 self.options = []
280 self.format = format
282 def replacement_text (self):
283 s = self.match.group ('match')
284 if verbose_p:
285 sys.stderr.write ('COMMENT Rules')
286 for i in rules[COMMENT]:
287 if verbose_p:
288 sys.stderr.write ('.')
289 s = re.sub (i[0], i[1], s)
290 return s
292 snippet_type_to_class = {
293 'multiline_comment': Multiline_comment,
294 # 'string': Multiline_comment,
295 # 'include': Include_snippet,
298 def find_toplevel_snippets (s, types):
299 if verbose_p:
300 sys.stderr.write ('Dissecting')
302 res = {}
303 for i in types:
304 res[i] = re.compile (snippet_res[format][i])
306 snippets = []
307 index = 0
308 ## found = dict (map (lambda x: (x, None),
309 ## types))
310 ## urg python2.1
311 found = {}
312 map (lambda x, f = found: f.setdefault (x, None),
313 types)
315 # We want to search for multiple regexes, without searching
316 # the string multiple times for one regex.
317 # Hence, we use earlier results to limit the string portion
318 # where we search.
319 # Since every part of the string is traversed at most once for
320 # every type of snippet, this is linear.
322 while 1:
323 if verbose_p:
324 sys.stderr.write ('.')
325 first = None
326 endex = 1 << 30
327 for type in types:
328 if not found[type] or found[type][0] < index:
329 found[type] = None
330 m = res[type].search (s[index:endex])
331 if not m:
332 continue
334 cl = Snippet
335 if snippet_type_to_class.has_key (type):
336 cl = snippet_type_to_class[type]
337 snip = cl (type, m, format)
338 start = index + m.start ('match')
339 found[type] = (start, snip)
341 if found[type] \
342 and (not first \
343 or found[type][0] < found[first][0]):
344 first = type
346 # FIXME.
348 # Limiting the search space is a cute
349 # idea, but this *requires* to search
350 # for possible containing blocks
351 # first, at least as long as we do not
352 # search for the start of blocks, but
353 # always/directly for the entire
354 # @block ... @end block.
356 endex = found[first][0]
358 if not first:
359 snippets.append (Substring (s, index, len (s)))
360 break
362 (start, snip) = found[first]
363 snippets.append (Substring (s, index, start))
364 snippets.append (snip)
365 found[first] = None
366 index = start + len (snip.match.group ('match'))
368 return snippets
370 def nitpick_file (outdir, file):
371 s = open (file).read ()
373 for i in rules[GLOBAL_CXX]:
374 s = re.sub (i[0], i[1], s)
376 # FIXME: Containing blocks must be first, see
377 # find_toplevel_snippets.
378 # We leave simple strings be part of the code
379 snippet_types = (
380 'multiline_comment',
381 'singleline_comment',
382 'string',
383 # 'char',
386 chunks = find_toplevel_snippets (s, snippet_types)
387 #code = filter (lambda x: is_derived_class (x.__class__, Substring),
388 # chunks)
390 t = string.join (map (lambda x: x.filter_text (), chunks), '')
391 fixt = file
392 if s != t:
393 if not outdir:
394 os.system ('mv %s %s~' % (file, file))
395 else:
396 fixt = os.path.join (outdir,
397 os.path.basename (file))
398 h = open (fixt, "w")
399 h.write (t)
400 h.close ()
401 if s != t or indent_p:
402 indent_file (fixt)
404 def indent_file (file):
405 emacs = '''emacs\
406 --no-window-system\
407 --batch\
408 --no-site-file\
409 --no-init-file\
410 %(file)s\
411 --eval '(let ((error nil)
412 (version-control nil))
413 (load-library "cc-mode")
414 (c++-mode)
415 (indent-region (point-min) (point-max))
416 (if (buffer-modified-p (current-buffer))
417 (save-buffer)))' ''' % vars ()
418 emacsclient = '''emacsclient\
419 --socket-name=%(socketdir)s/%(socketname)s\
420 --no-wait\
421 --eval '(let ((error nil)
422 (version-control nil))
423 (load-library "cc-mode")
424 (find-file "%(file)s")
425 (c++-mode)
426 (indent-region (point-min) (point-max))
427 (if (buffer-modified-p (current-buffer))
428 (save-buffer)))' ''' \
429 % { 'file': file,
430 'socketdir' : socketdir,
431 'socketname' : socketname, }
432 if verbose_p:
433 sys.stderr.write (emacs)
434 sys.stderr.write ('\n')
435 os.system (emacs)
438 def usage ():
439 sys.stdout.write (r'''
440 Usage:
441 fixcc [OPTION]... FILE...
443 Options:
444 --help
445 --indent reindent, even if no changes
446 --verbose
447 --test
449 Typical use with LilyPond:
451 fixcc $(find flower kpath-guile lily -name '*cc' -o -name '*hh' | grep -v /out)
453 ''')
455 def do_options ():
456 global indent_p, outdir, verbose_p
457 (options, files) = getopt.getopt (sys.argv[1:], '',
458 ['help', 'indent', 'outdir=',
459 'test', 'verbose'])
460 for (o, a) in options:
461 if o == '--help':
462 usage ()
463 sys.exit (0)
464 elif o == '--indent':
465 indent_p = 1
466 elif o == '--outdir':
467 outdir = a
468 elif o == '--verbose':
469 verbose_p = 1
470 elif o == '--test':
471 test ()
472 sys.exit (0)
473 else:
474 assert unimplemented
475 if not files:
476 usage ()
477 sys.exit (2)
478 return files
481 outdir = 0
482 format = CXX
483 socketdir = '/tmp/fixcc'
484 socketname = 'fixcc%d' % os.getpid ()
486 def setup_client ():
487 #--no-window-system\
488 #--batch\
489 os.unlink (os.path.join (socketdir, socketname))
490 os.mkdir (socketdir, 0700)
491 emacs='''emacs\
492 --no-site-file\
493 --no-init-file\
494 --eval '(let ((error nil)
495 (version-control nil))
496 (load-library "server")
497 (setq server-socket-dir "%(socketdir)s")
498 (setq server-name "%(socketname)s")
499 (server-start)
500 (while t) (sleep 1000))' ''' \
501 % { 'socketdir' : socketdir,
502 'socketname' : socketname, }
504 if not os.fork ():
505 os.system (emacs)
506 sys.exit (0)
507 while not os.path.exists (os.path.join (socketdir, socketname)):
508 time.sleep (1)
510 def main ():
511 #emacsclient should be faster, but this does not work yet
512 #setup_client ()
513 files = do_options ()
514 if outdir and not os.path.isdir (outdir):
515 os.makedirs (outdir)
516 for i in files:
517 sys.stderr.write ('%s...\n' % i)
518 nitpick_file (outdir, i)
521 ## TODO: make this compilable and check with g++
522 TEST = '''
523 #include <libio.h>
524 #include <map>
525 class
526 ostream ;
528 class Foo {
529 public: static char* foo ();
530 std::map<char*,int>* bar (char, char) { return 0; }
532 typedef struct
534 Foo **bar;
535 } String;
537 ostream &
538 operator << (ostream & os, String d);
540 typedef struct _t_ligature
542 char *succ, *lig;
543 struct _t_ligature * next;
544 } AFM_Ligature;
546 typedef std::map < AFM_Ligature const *, int > Bar;
549 Copyright (C) 1997--2010 Han-Wen Nienhuys <hanwen@cs.uu.nl>
552 /* ||
553 * vv
554 * !OK OK
556 /* ||
558 !OK OK
560 char *
561 Foo:: foo ()
566 char* a= &++ i ;
567 a [*++ a] = (char*) foe (*i, &bar) *
569 int operator double ();
570 std::map<char*,int> y =*bar(-*a ,*b);
571 Interval_t<T> & operator*= (T r);
572 Foo<T>*c;
573 int compare (Pqueue_ent < K, T > const& e1, Pqueue_ent < K,T> *e2);
574 delete *p;
575 if (abs (f)*2 > abs (d) *FUDGE)
577 while (0);
578 for (; i<x foo(); foo>bar);
579 for (; *p && > y;
580 foo > bar)
582 do {
585 while (foe);
587 squiggle. extent;
588 1 && * unsmob_moment (lf);
589 line_spanner_ = make_spanner ("DynamicLineSpanner", rq ? rq->*self_scm
590 (): SCM_EOL);
591 case foo: k;
593 if (0) {a=b;} else {
594 c=d;
597 cookie_io_functions_t Memory_out_stream::functions_ = {
598 Memory_out_stream::reader,
602 int compare (Array < Pitch> *, Array < Pitch> *);
603 original_ = (Grob *) & s;
604 Drul_array< Link_array<Grob> > o;
607 header_.char_info_pos = (6 + header_length) * 4;
608 return ly_bool2scm (*ma < * mb);
610 1 *::sign(2);
612 (shift) *-d;
614 a = 0 ? *x : *y;
616 a = "foo() 2,2,4";
618 if (!span_)
620 span_ = make_spanner ("StaffSymbol", SCM_EOL);
624 if (!span_)
626 span_ = make_spanner (StaffSymbol, SCM_EOL);
631 def test ():
632 test_file = 'fixcc.cc'
633 open (test_file, 'w').write (TEST)
634 nitpick_file (outdir, test_file)
635 sys.stdout.write (open (test_file).read ())
637 if __name__ == '__main__':
638 main ()