LSR: Update.
[lilypond.git] / buildscripts / fixcc.py
bloba3bd8559e8771314dc9061bc11124c600dca2fcc
1 #!/usr/bin/python
3 # fixcc -- nitpick lily's c++ code
5 # TODO
6 # * maintainable rules: regexp's using whitespace (?x) and match names
7 # <identifier>)
8 # * trailing `*' vs. function definition
9 # * do not break/change indentation of fixcc-clean files
10 # * check lexer, parser
11 # * rewrite in elisp, add to cc-mode
12 # * using regexes is broken by design
13 # * ?
14 # * profit
16 import __main__
17 import getopt
18 import os
19 import re
20 import string
21 import sys
22 import time
24 COMMENT = 'COMMENT'
25 STRING = 'STRING'
26 GLOBAL_CXX = 'GC++'
27 CXX = 'C++'
28 verbose_p = 0
29 indent_p = 0
31 rules = {
32 GLOBAL_CXX:
34 # delete gratuitous block
35 ('''\n( |\t)\s*{\n\s*(.*?)(?![{}]|\b(do|for|else|if|switch|while)\b);\n\s*}''',
36 '\n\\2;'),
38 CXX:
40 # space before parenthesis open
41 ('([^\( \]])[ \t]*\(', '\\1 ('),
42 # space after comma
43 ("\([^'],\)[ \t]*", '\1 '),
44 # delete gratuitous block
45 ('''\n( |\t)\s*{\n\s*(.*?)(?![{}]|\b(do|for|else|if|switch|while)\b);\n\s*}''',
46 '\n\\2;'),
47 # delete inline tabs
48 ('(\w)\t+', '\\1 '),
49 # delete inline double spaces
50 (' *', ' '),
51 # delete space after parenthesis open
52 ('\([ \t]*', '('),
53 # delete space before parenthesis close
54 ('[ \t]*\)', ')'),
55 # delete spaces after prefix
56 ('(--|\+\+)[ \t]*([\w\)])', '\\1\\2'),
57 # delete spaces before postfix
58 ('([\w\)\]])[ \t]*(--|\+\+)', '\\1\\2'),
59 # delete space after parenthesis close
60 #('\)[ \t]*([^\w])', ')\\1'),
61 # delete space around operator
62 # ('([\w\(\)\]])([ \t]*)(::|\.)([ \t]*)([\w\(\)])', '\\1\\3\\5'),
63 ('([\w\(\)\]])([ \t]*)(\.|->)([ \t]*)([\w\(\)])', '\\1\\3\\5'),
64 # delete space after operator
65 ('(::)([ \t]*)([\w\(\)])', '\\1\\3'),
66 # delete superflous space around operator
67 ('([\w\(\)\]])([ \t]+)(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|\+|-|=|/|:|&|\||\*)([ \t]+)([\w\(\)])', '\\1 \\3 \\5'),
68 # space around operator1
69 ('([\w\)\]]) *(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|=|/|:|&|\||\*) *([\w\(])', '\\1 \\2 \\3'),
70 # space around operator2
71 ('([\w\)\]]) *(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|=|/|:|&|\||\*) ([^\w\s])', '\\1 \\2 \\3'),
72 # space around operator3
73 ('([^\w\s]) (&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|[^-]>|=|/|:|&|\||\*) *([\w\(])', '\\1 \\2 \\3'),
74 # space around operator4
75 ('([\w\(\)\]]) (\*|/|\+|-) *([-:])', '\\1 \\2 \\3'),
76 # space around +/-; exponent
77 ('([\w\)\]])(\+|-)([_A-Za-z\(])', '\\1 \\2 \\3'),
78 ('([_\dA-Za-df-z\)\]])(\+|-)([\w\(])', '\\1 \\2 \\3'),
79 # trailing operator
80 (' (::|&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|\+|-|=|/|:|&XXX|\||\*XXX)[ \t]*\n([ \t]*)', '\n\\2\\1 '),
81 # pointer
82 ##('(bool|char|const|delete|int|stream|unsigned|void|size_t|struct \w+|[A-Z]\w*|,|;|&&|<|[^-]>|\|\||-|\+)[ \t]*(\*|&)[ \t]*', '\\1 \\2'),
83 ('(bool|char|const|delete|int|stream|unsigned|void|size_t|struct \w+|[A-Z]\w*|,|;|:|=|\?\)|&&|<|[^-]>|\|\||-|\+)[ \t]*(\*|&)[ \t]*', '\\1 \\2'),
84 #to#('(bool|char|const|delete|int|stream|unsigned|void|([A-Z]\w*)|[,])[ \n\t]*(\*|&)[ \t]*', '\\1 \\3'),
85 # pointer with template
86 ('(( *((bool|char|const|delete|int|stream|unsigned|void|size_t|class[ \t]+\w*|[A-Z]\w*|\w+::\w+|[,])[ \*&],*)+)>) *(\*|&) *', '\\1 \\5'),
87 #to#('(( *((bool|char|delete|int|stream|unsigned|void|(class[ \t]+\w*)|([A-Z]\w*)|[,])[ \*&],*)+)>)[ \t\n]*(\*|&) *', '\\1 \\7'),
88 # unary pointer, minus, not
89 ('(return|=) (\*|&|-|!) ([\w\(])', '\\1 \\2\\3'),
90 # space after `operator'
91 ('(\Woperator) *([^\w\s])', '\\1 \\2'),
92 # dangling brace close
93 ('\n[ \t]*(\n[ \t]*})', '\\1'),
94 # dangling newline
95 ('\n[ \t]*\n[ \t]*\n', '\n\n'),
96 # dangling parenthesis open
97 #('[ \t]*\n[ \t]*\([ \t]*\n', '('),
98 ('\([ \t]*\n', '('),
99 # dangling parenthesis close
100 ('\n[ \t]*\)', ')'),
101 # dangling comma
102 ('\n[ \t]*,', ','),
103 # dangling semicolon
104 ('\n[ \t]*;', ';'),
105 # brace open
106 ('(\w)[ \t]*([^\s]*){([ \t]*\n)', '\\1\\2\n{\n'),
107 # brace open backslash
108 ('(\w[^\n]*){[ \t]*\\\\\n', '\\1\\\n{\\\n'),
109 # brace close
110 ("}[ \t]*([^'\n]*\w[^\n\\\]*)\n", '}\n\\1\n'),
111 # brace close backslash
112 ("}[ \t]*([^'\n]*\w[^\n\\\]*)", '\n}\n\\1'),
113 # delete space after `operator'
114 #('(\Woperator) (\W)', '\\1\\2'),
115 # delete space after case, label
116 ('(\W(case|label) ([\w]+)) :', '\\1:'),
117 # delete space before comma
118 ('[ \t]*,', ','),
119 # delete space before semicolon
120 ('[ \t]*;', ';'),
121 # delete space before eol-backslash
122 ('[ \t]*\\\\\n', '\\\n'),
123 # delete trailing whitespace
124 ('[ \t]*\n', '\n'),
126 ## Deuglify code that also gets ugly by rules above.
127 # delete newline after typedef struct
128 ('(typedef struct\s+([\w]*\s){([^}]|{[^}]*})*})\s*\n\s*(\w[\w\d]*;)', '\\1 \\4'),
129 # delete spaces around template brackets
130 #('(dynamic_cast|template|([A-Z]\w*))[ \t]*<[ \t]*(( *(bool|char|int|unsigned|void|(class[ \t]+\w*)|([A-Z]\w*)),?)+)[ \t]?(| [\*&])[ \t]*>', '\\1<\\3\\8>'),
131 ('(dynamic_cast|template|typedef|\w+::\w+|[A-Z]\w*)[ \t]*<[ \t]*(( *(bool|char|const|int|unsigned|void|size_t|class[ \t]+\w*|[A-Z]\w*)( *[\*&]?,|[\*&])*)+)[ \t]?(| [\*&])[ \t]*>', '\\1<\\2\\6>'),
132 ('(\w+::\w+|[A-Z]\w*) < ((\w+::\w+|[A-Z]\w*)<[A-Z]\w*>) >', '\\1<\\2 >'),
133 ('((if|while)\s+\(([^\)]|\([^\)]*\))*\))\s*;', '\\1\n;'),
134 ('(for\s+\(([^;]*;[^;]*;([^\)]|\([^\)]*\))*)\))\s*;', '\\1\n;'),
135 # do {..} while
136 ('(}\s*while\s*)(\(([^\)]|\([^\)]*\))*\))\s*;', '\\1\\2;'),
138 ## Fix code that gets broken by rules above.
139 ##('->\s+\*', '->*'),
140 # delete space before #define x()
141 ('#[ \t]*define (\w*)[ \t]*\(', '#define \\1('),
142 # add space in #define x ()
143 ('#[ \t]*define (\w*)(\(([^\(\)]|\([^\(\)]*\))*\)\\n)',
144 '#define \\1 \\2'),
145 # delete space in #include <>
146 ('#[ \t]*include[ \t]*<[ \t]*([^ \t>]*)[ \t]*(/?)[ \t]*([^ \t>]*)[ \t]*>',
147 '#include <\\1\\2\\3>'),
148 # delete backslash before empty line (emacs' indent region is broken)
149 ('\\\\\n\n', '\n\n'),
152 COMMENT:
154 # delete trailing whitespace
155 ('[ \t]*\n', '\n'),
156 # delete empty first lines
157 ('(/\*\n)\n*', '\\1'),
158 # delete empty last lines
159 ('\n*(\n\*/)', '\\1'),
160 ## delete newline after start?
161 #('/(\*)\n', '\\1'),
162 ## delete newline before end?
163 #('\n(\*/)', '\\1'),
167 # Recognize special sequences in the input.
169 # (?P<name>regex) -- Assign result of REGEX to NAME.
170 # *? -- Match non-greedily.
171 # (?m) -- Multiline regex: Make ^ and $ match at each line.
172 # (?s) -- Make the dot match all characters including newline.
173 # (?x) -- Ignore whitespace in patterns.
174 no_match = 'a\ba'
175 snippet_res = {
176 CXX: {
177 'multiline_comment':
178 r'''(?sx)
179 (?P<match>
180 (?P<code>
181 [ \t]*/\*.*?\*/))''',
183 'singleline_comment':
184 r'''(?mx)
186 (?P<match>
187 (?P<code>
188 [ \t]*//([ \t][^\n]*|)\n))''',
190 'string':
191 r'''(?x)
192 (?P<match>
193 (?P<code>
194 "([^\"\n](\")*)*"))''',
196 'char':
197 r'''(?x)
198 (?P<match>
199 (?P<code>
200 '([^']+|\')))''',
202 'include':
203 r'''(?x)
204 (?P<match>
205 (?P<code>
206 "#[ \t]*include[ \t]*<[^>]*>''',
210 class Chunk:
211 def replacement_text (self):
212 return ''
214 def filter_text (self):
215 return self.replacement_text ()
217 class Substring (Chunk):
218 def __init__ (self, source, start, end):
219 self.source = source
220 self.start = start
221 self.end = end
223 def replacement_text (self):
224 s = self.source[self.start:self.end]
225 if verbose_p:
226 sys.stderr.write ('CXX Rules')
227 for i in rules[CXX]:
228 if verbose_p:
229 sys.stderr.write ('.')
230 #sys.stderr.write ('\n\n***********\n')
231 #sys.stderr.write (i[0])
232 #sys.stderr.write ('\n***********\n')
233 #sys.stderr.write ('\n=========>>\n')
234 #sys.stderr.write (s)
235 #sys.stderr.write ('\n<<=========\n')
236 s = re.sub (i[0], i[1], s)
237 if verbose_p:
238 sys.stderr.write ('done\n')
239 return s
242 class Snippet (Chunk):
243 def __init__ (self, type, match, format):
244 self.type = type
245 self.match = match
246 self.hash = 0
247 self.options = []
248 self.format = format
250 def replacement_text (self):
251 return self.match.group ('match')
253 def substring (self, s):
254 return self.match.group (s)
256 def __repr__ (self):
257 return `self.__class__` + ' type = ' + self.type
259 class Multiline_comment (Snippet):
260 def __init__ (self, source, match, format):
261 self.type = type
262 self.match = match
263 self.hash = 0
264 self.options = []
265 self.format = format
267 def replacement_text (self):
268 s = self.match.group ('match')
269 if verbose_p:
270 sys.stderr.write ('COMMENT Rules')
271 for i in rules[COMMENT]:
272 if verbose_p:
273 sys.stderr.write ('.')
274 s = re.sub (i[0], i[1], s)
275 return s
277 snippet_type_to_class = {
278 'multiline_comment': Multiline_comment,
279 # 'string': Multiline_comment,
280 # 'include': Include_snippet,
283 def find_toplevel_snippets (s, types):
284 if verbose_p:
285 sys.stderr.write ('Dissecting')
287 res = {}
288 for i in types:
289 res[i] = re.compile (snippet_res[format][i])
291 snippets = []
292 index = 0
293 ## found = dict (map (lambda x: (x, None),
294 ## types))
295 ## urg python2.1
296 found = {}
297 map (lambda x, f = found: f.setdefault (x, None),
298 types)
300 # We want to search for multiple regexes, without searching
301 # the string multiple times for one regex.
302 # Hence, we use earlier results to limit the string portion
303 # where we search.
304 # Since every part of the string is traversed at most once for
305 # every type of snippet, this is linear.
307 while 1:
308 if verbose_p:
309 sys.stderr.write ('.')
310 first = None
311 endex = 1 << 30
312 for type in types:
313 if not found[type] or found[type][0] < index:
314 found[type] = None
315 m = res[type].search (s[index:endex])
316 if not m:
317 continue
319 cl = Snippet
320 if snippet_type_to_class.has_key (type):
321 cl = snippet_type_to_class[type]
322 snip = cl (type, m, format)
323 start = index + m.start ('match')
324 found[type] = (start, snip)
326 if found[type] \
327 and (not first \
328 or found[type][0] < found[first][0]):
329 first = type
331 # FIXME.
333 # Limiting the search space is a cute
334 # idea, but this *requires* to search
335 # for possible containing blocks
336 # first, at least as long as we do not
337 # search for the start of blocks, but
338 # always/directly for the entire
339 # @block ... @end block.
341 endex = found[first][0]
343 if not first:
344 snippets.append (Substring (s, index, len (s)))
345 break
347 (start, snip) = found[first]
348 snippets.append (Substring (s, index, start))
349 snippets.append (snip)
350 found[first] = None
351 index = start + len (snip.match.group ('match'))
353 return snippets
355 def nitpick_file (outdir, file):
356 s = open (file).read ()
358 for i in rules[GLOBAL_CXX]:
359 s = re.sub (i[0], i[1], s)
361 # FIXME: Containing blocks must be first, see
362 # find_toplevel_snippets.
363 # We leave simple strings be part of the code
364 snippet_types = (
365 'multiline_comment',
366 'singleline_comment',
367 'string',
368 # 'char',
371 chunks = find_toplevel_snippets (s, snippet_types)
372 #code = filter (lambda x: is_derived_class (x.__class__, Substring),
373 # chunks)
375 t = string.join (map (lambda x: x.filter_text (), chunks), '')
376 fixt = file
377 if s != t:
378 if not outdir:
379 os.system ('mv %s %s~' % (file, file))
380 else:
381 fixt = os.path.join (outdir,
382 os.path.basename (file))
383 h = open (fixt, "w")
384 h.write (t)
385 h.close ()
386 if s != t or indent_p:
387 indent_file (fixt)
389 def indent_file (file):
390 emacs = '''emacs\
391 --no-window-system\
392 --batch\
393 --no-site-file\
394 --no-init-file\
395 %(file)s\
396 --eval '(let ((error nil)
397 (version-control nil))
398 (load-library "cc-mode")
399 (c++-mode)
400 (indent-region (point-min) (point-max))
401 (if (buffer-modified-p (current-buffer))
402 (save-buffer)))' ''' % vars ()
403 emacsclient = '''emacsclient\
404 --socket-name=%(socketdir)s/%(socketname)s\
405 --no-wait\
406 --eval '(let ((error nil)
407 (version-control nil))
408 (load-library "cc-mode")
409 (find-file "%(file)s")
410 (c++-mode)
411 (indent-region (point-min) (point-max))
412 (if (buffer-modified-p (current-buffer))
413 (save-buffer)))' ''' \
414 % { 'file': file,
415 'socketdir' : socketdir,
416 'socketname' : socketname, }
417 if verbose_p:
418 sys.stderr.write (emacs)
419 sys.stderr.write ('\n')
420 os.system (emacs)
423 def usage ():
424 sys.stdout.write (r'''
425 Usage:
426 fixcc [OPTION]... FILE...
428 Options:
429 --help
430 --indent reindent, even if no changes
431 --verbose
432 --test
434 Typical use with LilyPond:
436 fixcc $(find flower kpath-guile lily -name '*cc' -o -name '*hh' | grep -v /out)
438 This script is licensed under the GNU GPL
439 ''')
441 def do_options ():
442 global indent_p, outdir, verbose_p
443 (options, files) = getopt.getopt (sys.argv[1:], '',
444 ['help', 'indent', 'outdir=',
445 'test', 'verbose'])
446 for (o, a) in options:
447 if o == '--help':
448 usage ()
449 sys.exit (0)
450 elif o == '--indent':
451 indent_p = 1
452 elif o == '--outdir':
453 outdir = a
454 elif o == '--verbose':
455 verbose_p = 1
456 elif o == '--test':
457 test ()
458 sys.exit (0)
459 else:
460 assert unimplemented
461 if not files:
462 usage ()
463 sys.exit (2)
464 return files
467 outdir = 0
468 format = CXX
469 socketdir = '/tmp/fixcc'
470 socketname = 'fixcc%d' % os.getpid ()
472 def setup_client ():
473 #--no-window-system\
474 #--batch\
475 os.unlink (os.path.join (socketdir, socketname))
476 os.mkdir (socketdir, 0700)
477 emacs='''emacs\
478 --no-site-file\
479 --no-init-file\
480 --eval '(let ((error nil)
481 (version-control nil))
482 (load-library "server")
483 (setq server-socket-dir "%(socketdir)s")
484 (setq server-name "%(socketname)s")
485 (server-start)
486 (while t) (sleep 1000))' ''' \
487 % { 'socketdir' : socketdir,
488 'socketname' : socketname, }
490 if not os.fork ():
491 os.system (emacs)
492 sys.exit (0)
493 while not os.path.exists (os.path.join (socketdir, socketname)):
494 time.sleep (1)
496 def main ():
497 #emacsclient should be faster, but this does not work yet
498 #setup_client ()
499 files = do_options ()
500 if outdir and not os.path.isdir (outdir):
501 os.makedirs (outdir)
502 for i in files:
503 sys.stderr.write ('%s...\n' % i)
504 nitpick_file (outdir, i)
507 ## TODO: make this compilable and check with g++
508 TEST = '''
509 #include <libio.h>
510 #include <map>
511 class
512 ostream ;
514 class Foo {
515 public: static char* foo ();
516 std::map<char*,int>* bar (char, char) { return 0; }
518 typedef struct
520 Foo **bar;
521 } String;
523 ostream &
524 operator << (ostream & os, String d);
526 typedef struct _t_ligature
528 char *succ, *lig;
529 struct _t_ligature * next;
530 } AFM_Ligature;
532 typedef std::map < AFM_Ligature const *, int > Bar;
535 (c) 1997--2007 Han-Wen Nienhuys <hanwen@cs.uu.nl>
538 /* ||
539 * vv
540 * !OK OK
542 /* ||
544 !OK OK
546 char *
547 Foo:: foo ()
552 char* a= &++ i ;
553 a [*++ a] = (char*) foe (*i, &bar) *
555 int operator double ();
556 std::map<char*,int> y =*bar(-*a ,*b);
557 Interval_t<T> & operator*= (T r);
558 Foo<T>*c;
559 int compare (Pqueue_ent < K, T > const& e1, Pqueue_ent < K,T> *e2);
560 delete *p;
561 if (abs (f)*2 > abs (d) *FUDGE)
563 while (0);
564 for (; i<x foo(); foo>bar);
565 for (; *p && > y;
566 foo > bar)
568 do {
571 while (foe);
573 squiggle. extent;
574 1 && * unsmob_moment (lf);
575 line_spanner_ = make_spanner ("DynamicLineSpanner", rq ? rq->*self_scm
576 (): SCM_EOL);
577 case foo: k;
579 if (0) {a=b;} else {
580 c=d;
583 cookie_io_functions_t Memory_out_stream::functions_ = {
584 Memory_out_stream::reader,
588 int compare (Array < Pitch> *, Array < Pitch> *);
589 original_ = (Grob *) & s;
590 Drul_array< Link_array<Grob> > o;
593 header_.char_info_pos = (6 + header_length) * 4;
594 return ly_bool2scm (*ma < * mb);
596 1 *::sign(2);
598 (shift) *-d;
600 a = 0 ? *x : *y;
602 a = "foo() 2,2,4";
604 if (!span_)
606 span_ = make_spanner ("StaffSymbol", SCM_EOL);
610 if (!span_)
612 span_ = make_spanner (StaffSymbol, SCM_EOL);
617 def test ():
618 test_file = 'fixcc.cc'
619 open (test_file, 'w').write (TEST)
620 nitpick_file (outdir, test_file)
621 sys.stdout.write (open (test_file).read ())
623 if __name__ == '__main__':
624 main ()