3 # Thomas Nagy, 2006-2009 (ita)
6 C/C++ preprocessor for finding dependencies
8 Reasons for using the Waf preprocessor by default
9 1. Some c/c++ extensions (Qt) require a custom preprocessor for obtaining the dependencies (.moc files)
10 2. Not all compilers provide .d files for obtaining the dependencies (portability)
11 3. A naive file scanner will not catch the constructs such as "#include foo()"
12 4. A naive file scanner will catch unnecessary dependencies (change an unused header -> recompile everything)
14 Regarding the speed concerns:
15 a. the preprocessing is performed only when files must be compiled
16 b. the macros are evaluated only for #if/#elif/#include
17 c. the time penalty is about 10%
18 d. system headers are not scanned
20 Now if you do not want the Waf preprocessor, the tool "gccdeps" uses the .d files produced
21 during the compilation to track the dependencies (useful when used with the boost libraries).
22 It only works with gcc though, and it cannot be used with Qt builds. A dumb
23 file scanner will be added in the future, so we will have most bahaviours.
25 # TODO: more varargs, pragma once
26 # TODO: dumb file scanner tracking all includes
28 import re
, sys
, os
, string
29 import Logs
, Build
, Utils
30 from Logs
import debug
, error
33 class PreprocError(Utils
.WafError
):
39 recursion_limit
= 5000
40 "do not loop too much on header inclusion"
43 "set to 1 to track headers on files in /usr/include - else absolute paths are ignored"
45 standard_includes
= ['/usr/include']
46 if sys
.platform
== "win32":
47 standard_includes
= []
50 'apply the trigraph rules first'
53 "Keep <> for system includes (do not search for those includes)"
67 "these ops are for c++, to reset, set an empty dict"
69 # ignore #warning and #error
70 re_lines
= re
.compile(\
71 '^[ \t]*(#|%:)[ \t]*(ifdef|ifndef|if|else|elif|endif|include|import|define|undef|pragma)[ \t]*(.*)\r*$',
72 re
.IGNORECASE | re
.MULTILINE
)
74 re_mac
= re
.compile("^[a-zA-Z_]\w*")
75 re_fun
= re
.compile('^[a-zA-Z_][a-zA-Z0-9_]*[(]')
76 re_pragma_once
= re
.compile('^\s*once\s*', re
.IGNORECASE
)
77 re_nl
= re
.compile('\\\\\r*\n', re
.MULTILINE
)
79 r
"""(/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)|//[^\n]*|("(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*'|.[^/"'\\]*)""",
81 trig_def
= [('??'+a
, b
) for a
, b
in zip("=-/!'()<>", r
'#~\|^[]{}')]
82 chr_esc
= {'0':0, 'a':7, 'b':8, 't':9, 'n':10, 'f':11, 'v':12, 'r':13, '\\':92, "'":39}
90 tok_types
= [NUM
, STR
, IDENT
, OP
]
92 r
"""0[xX](?P<hex>[a-fA-F0-9]+)(?P<qual1>[uUlL]*)|L*?'(?P<char>(\\.|[^\\'])+)'|(?P<n1>\d+)[Ee](?P<exp0>[+-]*?\d+)(?P<float0>[fFlL]*)|(?P<n2>\d*\.\d+)([Ee](?P<exp1>[+-]*?\d+))?(?P<float1>[fFlL]*)|(?P<n4>\d+\.\d*)([Ee](?P<exp2>[+-]*?\d+))?(?P<float2>[fFlL]*)|(?P<oct>0*)(?P<n0>\d+)(?P<qual2>[uUlL]*)""",
95 r
'%:%:|<<=|>>=|\.\.\.|<<|<%|<:|<=|>>|>=|\+\+|\+=|--|->|-=|\*=|/=|%:|%=|%>|==|&&|&=|\|\||\|=|\^=|:>|!=|##|[\(\)\{\}\[\]<>\?\|\^\*\+&=:!#;,%/\-\?\~\.]',
97 re_clexer
= re
.compile('|'.join(["(?P<%s>%s)" % (name
, part
) for name
, part
in zip(tok_types
, exp_types
)]), re
.M
)
112 def filter_comments(filename
):
113 # return a list of tuples : keyword, line
114 code
= Utils
.readf(filename
)
116 for (a
, b
) in trig_def
: code
= code
.split(a
).join(b
)
117 code
= re_nl
.sub('', code
)
118 code
= re_cpp
.sub(repl
, code
)
119 return [(m
.group(2), m
.group(3)) for m
in re
.finditer(re_lines
, code
)]
122 # op -> number, needed for such expressions: #if 1 && 2 != 0
123 ops
= ['* / %', '+ -', '<< >>', '< <= >= >', '== !=', '& | ^', '&& ||', ',']
124 for x
in range(len(ops
)):
126 for u
in syms
.split():
129 def reduce_nums(val_1
, val_2
, val_op
):
130 """apply arithmetic rules and try to return an integer result"""
131 #print val_1, val_2, val_op
133 # now perform the operation, make certain a and b are numeric
135 except TypeError: a
= int(val_1
)
137 except TypeError: b
= int(val_2
)
147 elif d
=='||': c
= int(a
or b
)
149 elif d
=='&&': c
= int(a
and b
)
150 elif d
=='==': c
= int(a
== b
)
151 elif d
=='!=': c
= int(a
!= b
)
152 elif d
=='<=': c
= int(a
<= b
)
153 elif d
=='<': c
= int(a
< b
)
154 elif d
=='>': c
= int(a
> b
)
155 elif d
=='>=': c
= int(a
>= b
)
156 elif d
=='^': c
= int(a^b
)
157 elif d
=='<<': c
= a
<<b
158 elif d
=='>>': c
= a
>>b
163 if not lst
: raise PreprocError("empty list for get_num")
181 raise PreprocError("rparen expected %r" % lst
)
183 (num
, _
) = get_term(lst
[1:i
])
184 return (num
, lst
[i
+1:])
187 return get_num(lst
[1:])
189 num
, lst
= get_num(lst
[1:])
190 return (reduce_nums('-1', num
, '*'), lst
)
192 num
, lst
= get_num(lst
[1:])
193 return (int(not int(num
)), lst
)
195 return (~
int(num
), lst
)
197 raise PreprocError("invalid op token %r for get_num" % lst
)
201 # all macros should have been replaced, remaining identifiers eval to 0
204 raise PreprocError("invalid token %r for get_num" % lst
)
207 if not lst
: raise PreprocError("empty list for get_term")
208 num
, lst
= get_num(lst
)
213 if v
== '&&' and not num
:
215 elif v
== '||' and num
:
219 return get_term(lst
[1:])
236 raise PreprocError("rparen expected %r" % lst
)
239 return get_term(lst
[1:i
])
241 return get_term(lst
[i
+1:])
244 num2
, lst
= get_num(lst
[1:])
247 # no more tokens to process
248 num2
= reduce_nums(num
, num2
, v
)
249 return get_term([(NUM
, num2
)] + lst
)
251 # operator precedence
254 raise PreprocError("op expected %r" % lst
)
256 if prec
[v2
] >= prec
[v
]:
257 num2
= reduce_nums(num
, num2
, v
)
258 return get_term([(NUM
, num2
)] + lst
)
260 num3
, lst
= get_num(lst
[1:])
261 num3
= reduce_nums(num2
, num3
, v2
)
262 return get_term([(NUM
, num
), (p
, v
), (NUM
, num3
)] + lst
)
265 raise PreprocError("cannot reduce %r" % lst
)
267 def reduce_eval(lst
):
268 """take a list of tokens and output true or false (#if/#elif conditions)"""
269 num
, lst
= get_term(lst
)
273 """use for converting a list of tokens to a string"""
274 lst
= [str(v2
) for (p2
, v2
) in lst
]
277 def paste_tokens(t1
, t2
):
279 here is what we can paste:
285 if t1
[0] == OP
and t2
[0] == OP
:
287 elif t1
[0] == IDENT
and (t2
[0] == IDENT
or t2
[0] == NUM
):
289 elif t1
[0] == NUM
and t2
[0] == NUM
:
292 raise PreprocError('tokens do not make a valid paste %r and %r' % (t1
, t2
))
293 return (p1
, t1
[1] + t2
[1])
295 def reduce_tokens(lst
, defs
, ban
=[]):
296 """replace the tokens in lst, using the macros provided in defs, and a list of macros that cannot be re-applied"""
302 if p
== IDENT
and v
== "defined":
311 elif p2
== OP
and v2
== '(':
314 del lst
[i
] # remove the ident, and change the ) for the value
320 raise PreprocError("invalid define expression %r" % lst
)
322 elif p
== IDENT
and v
in defs
:
324 if isinstance(defs
[v
], str):
325 a
, b
= extract_macro(defs
[v
])
328 to_add
= macro_def
[1]
330 if isinstance(macro_def
[0], list):
331 # macro without arguments
333 for x
in xrange(len(to_add
)):
334 lst
.insert(i
, to_add
[x
])
337 # collect the arguments for the funcall
343 raise PreprocError("expected '(' after %r (got nothing)" % v
)
346 if p2
!= OP
or v2
!= '(':
347 raise PreprocError("expected '(' after %r" % v
)
357 if p2
== OP
and count_paren
== 0:
359 one_param
.append((p2
, v2
))
362 if one_param
: args
.append(one_param
)
365 if not one_param
: raise PreprocError("empty param in funcall %s" % p
)
366 args
.append(one_param
)
369 one_param
.append((p2
, v2
))
371 one_param
.append((p2
, v2
))
372 if v2
== '(': count_paren
+= 1
373 elif v2
== ')': count_paren
-= 1
375 raise PreprocError('malformed macro')
377 # substitute the arguments within the define expression
379 arg_table
= macro_def
[0]
381 while j
< len(to_add
):
384 if p2
== OP
and v2
== '#':
385 # stringize is for arguments only
386 if j
+1 < len(to_add
) and to_add
[j
+1][0] == IDENT
and to_add
[j
+1][1] in arg_table
:
387 toks
= args
[arg_table
[to_add
[j
+1][1]]]
388 accu
.append((STR
, stringize(toks
)))
391 accu
.append((p2
, v2
))
392 elif p2
== OP
and v2
== '##':
393 # token pasting, how can man invent such a complicated system?
394 if accu
and j
+1 < len(to_add
):
395 # we have at least two tokens
399 if to_add
[j
+1][0] == IDENT
and to_add
[j
+1][1] in arg_table
:
400 toks
= args
[arg_table
[to_add
[j
+1][1]]]
403 accu
[-1] = paste_tokens(t1
, toks
[0]) #(IDENT, accu[-1][1] + toks[0][1])
404 accu
.extend(toks
[1:])
407 accu
.append((p2
, v2
))
409 elif to_add
[j
+1][0] == IDENT
and to_add
[j
+1][1] == '__VA_ARGS__':
411 # first collect the tokens
413 st
= len(macro_def
[0])
415 for x
in args
[pt
-st
+1:]:
417 va_toks
.append((OP
, ','))
418 if va_toks
: va_toks
.pop() # extra comma
423 # remove the token paste
425 if v4
== ',' and pt
< st
:
430 accu
[-1] = paste_tokens(t1
, to_add
[j
+1])
434 # invalid paste, case "##a" or "b##"
435 accu
.append((p2
, v2
))
437 elif p2
== IDENT
and v2
in arg_table
:
438 toks
= args
[arg_table
[v2
]]
439 reduce_tokens(toks
, defs
, ban
+[v
])
442 accu
.append((p2
, v2
))
447 reduce_tokens(accu
, defs
, ban
+[v
])
449 for x
in xrange(len(accu
)-1, -1, -1):
450 lst
.insert(i
, accu
[x
])
455 def eval_macro(lst
, adefs
):
456 """reduce the tokens from the list lst, and try to return a 0/1 result"""
457 reduce_tokens(lst
, adefs
, [])
458 if not lst
: raise PreprocError("missing tokens to evaluate")
459 (p
, v
) = reduce_eval(lst
)
462 def extract_macro(txt
):
463 """process a macro definition from "#define f(x, y) x * y" into a function or a simple macro without arguments"""
465 if re_fun
.search(txt
):
469 if p
!= OP
: raise PreprocError("expected open parenthesis")
485 elif p
== OP
and v
== ')':
488 raise PreprocError("unexpected token (3)")
490 if p
== OP
and v
== ',':
492 elif p
== OP
and v
== ')':
495 raise PreprocError("comma or ... expected")
501 elif p
== OP
and v
== '...':
502 raise PreprocError("not implemented (1)")
504 raise PreprocError("comma or ... expected (2)")
506 raise PreprocError("not implemented (2)")
508 raise PreprocError("unexpected else")
510 #~ print (name, [params, t[i+1:]])
511 return (name
, [params
, t
[i
+1:]])
514 return (v
, [[], t
[1:]])
516 re_include
= re
.compile('^\s*(<(?P<a>.*)>|"(?P<b>.*)")')
517 def extract_include(txt
, defs
):
518 """process a line in the form "#include foo" to return a string representing the file"""
519 m
= re_include
.search(txt
)
521 if m
.group('a'): return '<', m
.group('a')
522 if m
.group('b'): return '"', m
.group('b')
524 # perform preprocessing and look at the result, it must match an include
526 reduce_tokens(toks
, defs
, ['waf_include'])
529 raise PreprocError("could not parse include %s" % txt
)
532 if toks
[0][0] == STR
:
533 return '"', toks
[0][1]
535 if toks
[0][1] == '<' and toks
[-1][1] == '>':
536 return stringize(toks
).lstrip('<').rstrip('>')
538 raise PreprocError("could not parse include %s." % txt
)
541 if not txt
: raise PreprocError("attempted to parse a null char")
546 if len(txt
) == 4 and txt
[3] in string
.hexdigits
: return int(txt
[2:], 16)
547 return int(txt
[2:], 16)
549 if c
== '0' and len(txt
)==2: return 0
551 if len(txt
) > i
and txt
[1:1+i
].isdigit():
552 return (1+i
, int(txt
[1:1+i
], 8))
554 try: return chr_esc
[c
]
555 except KeyError: raise PreprocError("could not parse char literal '%s'" % txt
)
558 def tokenize_private(s
):
560 for match
in re_clexer
.finditer(s
):
562 for name
in tok_types
:
566 try: v
= g_optrans
[v
]; name
= OP
569 if v
.lower() == "true":
572 elif v
.lower() == "false":
576 if m('oct'): v
= int(v
, 8)
577 elif m('hex'): v
= int(m('hex'), 16)
578 elif m('n0'): v
= m('n0')
581 if v
: v
= parse_char(v
)
582 else: v
= m('n2') or m('n4')
584 if v
== '%:': v
= '#'
585 elif v
== '%:%:': v
= '##'
587 # remove the quotes around the string
589 ret
.append((name
, v
))
594 """convert a string into a list of tokens (shlex.split does not apply to c/c++/d)"""
595 return tokenize_private(s
)[:]
598 def define_name(line
):
599 return re_mac
.match(line
).group(0)
601 class c_parser(object):
602 def __init__(self
, nodepaths
=None, defines
=None):
603 #self.lines = txt.split('\n')
609 self
.defs
= dict(defines
) # make a copy
612 self
.env
= None # needed for the variant when searching for files
615 self
.currentnode_stack
= []
617 self
.nodepaths
= nodepaths
or []
624 self
.ban_includes
= set([])
626 def cached_find_resource(self
, node
, filename
):
628 nd
= node
.bld
.cache_nd
630 nd
= node
.bld
.cache_nd
= {}
632 tup
= (node
.id, filename
)
636 ret
= node
.find_resource(filename
)
640 def tryfind(self
, filename
):
641 self
.curfile
= filename
643 # for msvc it should be a for loop on the whole stack
644 found
= self
.cached_find_resource(self
.currentnode_stack
[-1], filename
)
646 for n
in self
.nodepaths
:
649 found
= self
.cached_find_resource(n
, filename
)
652 self
.nodes
.append(found
)
653 if filename
[-4:] != '.moc':
656 if not filename
in self
.names
:
657 self
.names
.append(filename
)
660 def addlines(self
, node
):
662 self
.currentnode_stack
.append(node
.parent
)
663 filepath
= node
.abspath(self
.env
)
665 self
.count_files
+= 1
666 if self
.count_files
> recursion_limit
: raise PreprocError("recursion limit exceeded")
667 pc
= self
.parse_cache
668 debug('preproc: reading file %r', filepath
)
674 self
.lines
.extend(lns
)
678 lines
= filter_comments(filepath
)
679 lines
.append((POPFILE
, ''))
681 pc
[filepath
] = lines
# cache the lines filtered
682 self
.lines
.extend(lines
)
684 raise PreprocError("could not read the file %s" % filepath
)
687 error("parsing %s failed" % filepath
)
688 traceback
.print_exc()
690 def start(self
, node
, env
):
691 debug('preproc: scanning %s (in %s)', node
.name
, node
.parent
.name
)
694 variant
= node
.variant(env
)
695 bld
= node
.__class
__.bld
697 self
.parse_cache
= bld
.parse_cache
698 except AttributeError:
700 self
.parse_cache
= bld
.parse_cache
704 lst
= [('define', x
) for x
in env
['DEFLINES']]
706 self
.lines
.extend(lst
)
709 (kind
, line
) = self
.lines
.pop()
711 self
.currentnode_stack
.pop()
714 self
.process_line(kind
, line
)
717 debug('preproc: line parsing failed (%s): %s %s', e
, line
, Utils
.ex_stack())
719 def process_line(self
, token
, line
):
721 WARNING: a new state must be added for if* because the endif
724 if ve
: debug('preproc: line is %s - %s state is %s', token
, line
, self
.state
)
727 # make certain we define the state if we are about to enter in an if block
728 if token
in ['ifdef', 'ifndef', 'if']:
729 state
.append(undefined
)
730 elif token
== 'endif':
733 # skip lines when in a dead 'if' branch, wait for the endif
734 if not token
in ['else', 'elif', 'endif']:
735 if skipped
in self
.state
or ignored
in self
.state
:
739 ret
= eval_macro(tokenize(line
), self
.defs
)
740 if ret
: state
[-1] = accepted
741 else: state
[-1] = ignored
742 elif token
== 'ifdef':
743 m
= re_mac
.match(line
)
744 if m
and m
.group(0) in self
.defs
: state
[-1] = accepted
745 else: state
[-1] = ignored
746 elif token
== 'ifndef':
747 m
= re_mac
.match(line
)
748 if m
and m
.group(0) in self
.defs
: state
[-1] = ignored
749 else: state
[-1] = accepted
750 elif token
== 'include' or token
== 'import':
751 (kind
, inc
) = extract_include(line
, self
.defs
)
752 if inc
in self
.ban_includes
: return
753 if token
== 'import': self
.ban_includes
.add(inc
)
754 if ve
: debug('preproc: include found %s (%s) ', inc
, kind
)
755 if kind
== '"' or not strict_quotes
:
757 elif token
== 'elif':
758 if state
[-1] == accepted
:
760 elif state
[-1] == ignored
:
761 if eval_macro(tokenize(line
), self
.defs
):
763 elif token
== 'else':
764 if state
[-1] == accepted
: state
[-1] = skipped
765 elif state
[-1] == ignored
: state
[-1] = accepted
766 elif token
== 'define':
768 self
.defs
[define_name(line
)] = line
770 raise PreprocError("invalid define line %s" % line
)
771 elif token
== 'undef':
772 m
= re_mac
.match(line
)
773 if m
and m
.group(0) in self
.defs
:
774 self
.defs
.__delitem
__(m
.group(0))
775 #print "undef %s" % name
776 elif token
== 'pragma':
777 if re_pragma_once
.match(line
.lower()):
778 self
.ban_includes
.add(self
.curfile
)
780 def get_deps(node
, env
, nodepaths
=[]):
782 Get the dependencies using a c/c++ preprocessor, this is required for finding dependencies of the kind
783 #include some_macro()
786 gruik
= c_parser(nodepaths
)
787 gruik
.start(node
, env
)
788 return (gruik
.nodes
, gruik
.names
)
790 #################### dumb dependency scanner
792 re_inc
= re
.compile(\
793 '^[ \t]*(#|%:)[ \t]*(include)[ \t]*(.*)\r*$',
794 re
.IGNORECASE | re
.MULTILINE
)
796 def lines_includes(filename
):
797 code
= Utils
.readf(filename
)
799 for (a
, b
) in trig_def
: code
= code
.split(a
).join(b
)
800 code
= re_nl
.sub('', code
)
801 code
= re_cpp
.sub(repl
, code
)
802 return [(m
.group(2), m
.group(3)) for m
in re
.finditer(re_inc
, code
)]
804 def get_deps_simple(node
, env
, nodepaths
=[], defines
={}):
806 Get the dependencies by just looking recursively at the #include statements
813 lst
= lines_includes(node
.abspath(env
))
815 for (_
, line
) in lst
:
816 (t
, filename
) = extract_include(line
, defines
)
817 if filename
in names
:
820 if filename
.endswith('.moc'):
821 names
.append(filename
)
827 found
= n
.find_resource(filename
)
830 if not filename
in names
:
831 names
.append(filename
)
832 elif not found
in nodes
:
837 return (nodes
, names
)