waflib/Tools/c_preproc.py

   1 #!/usr/bin/env python
   2 # encoding: utf-8
   3 # Thomas Nagy, 2006-2018 (ita)
   4
   5 """
   6 C/C++ preprocessor for finding dependencies
   7
   8 Reasons for using the Waf preprocessor by default
   9
  10 #. Some c/c++ extensions (Qt) require a custom preprocessor for obtaining the dependencies (.moc files)
  11 #. Not all compilers provide .d files for obtaining the dependencies (portability)
  12 #. A naive file scanner will not catch the constructs such as "#include foo()"
  13 #. A naive file scanner will catch unnecessary dependencies (change an unused header -> recompile everything)
  14
  15 Regarding the speed concerns:
  16
  17 * the preprocessing is performed only when files must be compiled
  18 * the macros are evaluated only for #if/#elif/#include
  19 * system headers are not scanned by default
  20
  21 Now if you do not want the Waf preprocessor, the tool +gccdeps* uses the .d files produced
  22 during the compilation to track the dependencies (useful when used with the boost libraries).
  23 It only works with gcc >= 4.4 though.
  24
  25 A dumb preprocessor is also available in the tool *c_dumbpreproc*
  26 """
  27 # TODO: more varargs, pragma once
  28
  29 import re, string, traceback
  30 from waflib import Logs, Utils, Errors
  31
  32 class PreprocError(Errors.WafError):
  33         pass
  34
  35 FILE_CACHE_SIZE = 100000
  36 LINE_CACHE_SIZE = 100000
  37
  38 POPFILE = '-'
  39 "Constant representing a special token used in :py:meth:`waflib.Tools.c_preproc.c_parser.start` iteration to switch to a header read previously"
  40
  41 recursion_limit = 150
  42 "Limit on the amount of files to read in the dependency scanner"
  43
  44 go_absolute = False
  45 "Set to True to track headers on files in /usr/include, else absolute paths are ignored (but it becomes very slow)"
  46
  47 standard_includes = ['/usr/local/include', '/usr/include']
  48 if Utils.is_win32:
  49         standard_includes = []
  50
  51 use_trigraphs = 0
  52 """Apply trigraph rules (False by default)"""
  53
  54 # obsolete, do not use
  55 strict_quotes = 0
  56
  57 g_optrans = {
  58 'not':'!',
  59 'not_eq':'!',
  60 'and':'&&',
  61 'and_eq':'&=',
  62 'or':'||',
  63 'or_eq':'|=',
  64 'xor':'^',
  65 'xor_eq':'^=',
  66 'bitand':'&',
  67 'bitor':'|',
  68 'compl':'~',
  69 }
  70 """Operators such as and/or/xor for c++. Set an empty dict to disable."""
  71
  72 # ignore #warning and #error
  73 re_lines = re.compile(
  74         '^[ \t]*(?:#|%:)[ \t]*(ifdef|ifndef|if|else|elif|endif|include|import|define|undef|pragma)[ \t]*(.*)\r*$',
  75         re.IGNORECASE | re.MULTILINE)
  76 """Match #include lines"""
  77
  78 re_mac = re.compile("^[a-zA-Z_]\w*")
  79 """Match macro definitions"""
  80
  81 re_fun = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*[(]')
  82 """Match macro functions"""
  83
  84 re_pragma_once = re.compile('^\s*once\s*', re.IGNORECASE)
  85 """Match #pragma once statements"""
  86
  87 re_nl = re.compile('\\\\\r*\n', re.MULTILINE)
  88 """Match newlines"""
  89
  90 re_cpp = re.compile(r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', re.DOTALL | re.MULTILINE )
  91 """Filter C/C++ comments"""
  92
  93 trig_def = [('??'+a, b) for a, b in zip("=-/!'()<>", r'#~\|^[]{}')]
  94 """Trigraph definitions"""
  95
  96 chr_esc = {'0':0, 'a':7, 'b':8, 't':9, 'n':10, 'f':11, 'v':12, 'r':13, '\\':92, "'":39}
  97 """Escape characters"""
  98
  99 NUM   = 'i'
 100 """Number token"""
 101
 102 OP    = 'O'
 103 """Operator token"""
 104
 105 IDENT = 'T'
 106 """Identifier token"""
 107
 108 STR   = 's'
 109 """String token"""
 110
 111 CHAR  = 'c'
 112 """Character token"""
 113
 114 tok_types = [NUM, STR, IDENT, OP]
 115 """Token types"""
 116
 117 exp_types = [
 118         r"""0[xX](?P<hex>[a-fA-F0-9]+)(?P<qual1>[uUlL]*)|L*?'(?P<char>(\\.|[^\\'])+)'|(?P<n1>\d+)[Ee](?P<exp0>[+-]*?\d+)(?P<float0>[fFlL]*)|(?P<n2>\d*\.\d+)([Ee](?P<exp1>[+-]*?\d+))?(?P<float1>[fFlL]*)|(?P<n4>\d+\.\d*)([Ee](?P<exp2>[+-]*?\d+))?(?P<float2>[fFlL]*)|(?P<oct>0*)(?P<n0>\d+)(?P<qual2>[uUlL]*)""",
 119         r'L?"([^"\\]|\\.)*"',
 120         r'[a-zA-Z_]\w*',
 121         r'%:%:|<<=|>>=|\.\.\.|<<|<%|<:|<=|>>|>=|\+\+|\+=|--|->|-=|\*=|/=|%:|%=|%>|==|&&|&=|\|\||\|=|\^=|:>|!=|##|[\(\)\{\}\[\]<>\?\|\^\*\+&=:!#;,%/\-\?\~\.]',
 122 ]
 123 """Expression types"""
 124
 125 re_clexer = re.compile('|'.join(["(?P<%s>%s)" % (name, part) for name, part in zip(tok_types, exp_types)]), re.M)
 126 """Match expressions into tokens"""
 127
 128 accepted  = 'a'
 129 """Parser state is *accepted*"""
 130
 131 ignored   = 'i'
 132 """Parser state is *ignored*, for example preprocessor lines in an #if 0 block"""
 133
 134 undefined = 'u'
 135 """Parser state is *undefined* at the moment"""
 136
 137 skipped   = 's'
 138 """Parser state is *skipped*, for example preprocessor lines in a #elif 0 block"""
 139
 140 def repl(m):
 141         """Replace function used with :py:attr:`waflib.Tools.c_preproc.re_cpp`"""
 142         s = m.group()
 143         if s[0] == '/':
 144                 return ' '
 145         return s
 146
 147 prec = {}
 148 """
 149 Operator precedence rules required for parsing expressions of the form::
 150
 151         #if 1 && 2 != 0
 152 """
 153 ops = ['* / %', '+ -', '<< >>', '< <= >= >', '== !=', '& | ^', '&& ||', ',']
 154 for x, syms in enumerate(ops):
 155         for u in syms.split():
 156                 prec[u] = x
 157
 158 def reduce_nums(val_1, val_2, val_op):
 159         """
 160         Apply arithmetic rules to compute a result
 161
 162         :param val1: input parameter
 163         :type val1: int or string
 164         :param val2: input parameter
 165         :type val2: int or string
 166         :param val_op: C operator in *+*, */*, *-*, etc
 167         :type val_op: string
 168         :rtype: int
 169         """
 170         #print val_1, val_2, val_op
 171
 172         # now perform the operation, make certain a and b are numeric
 173         try:
 174                 a = 0 + val_1
 175         except TypeError:
 176                 a = int(val_1)
 177         try:
 178                 b = 0 + val_2
 179         except TypeError:
 180                 b = int(val_2)
 181
 182         d = val_op
 183         if d == '%':
 184                 c = a % b
 185         elif d=='+':
 186                 c = a + b
 187         elif d=='-':
 188                 c = a - b
 189         elif d=='*':
 190                 c = a * b
 191         elif d=='/':
 192                 c = a / b
 193         elif d=='^':
 194                 c = a ^ b
 195         elif d=='==':
 196                 c = int(a == b)
 197         elif d=='|'  or d == 'bitor':
 198                 c = a | b
 199         elif d=='||' or d == 'or' :
 200                 c = int(a or b)
 201         elif d=='&'  or d == 'bitand':
 202                 c = a & b
 203         elif d=='&&' or d == 'and':
 204                 c = int(a and b)
 205         elif d=='!=' or d == 'not_eq':
 206                 c = int(a != b)
 207         elif d=='^'  or d == 'xor':
 208                 c = int(a^b)
 209         elif d=='<=':
 210                 c = int(a <= b)
 211         elif d=='<':
 212                 c = int(a < b)
 213         elif d=='>':
 214                 c = int(a > b)
 215         elif d=='>=':
 216                 c = int(a >= b)
 217         elif d=='<<':
 218                 c = a << b
 219         elif d=='>>':
 220                 c = a >> b
 221         else:
 222                 c = 0
 223         return c
 224
 225 def get_num(lst):
 226         """
 227         Try to obtain a number from a list of tokens. The token types are defined in :py:attr:`waflib.Tools.ccroot.tok_types`.
 228
 229         :param lst: list of preprocessor tokens
 230         :type lst: list of tuple (tokentype, value)
 231         :return: a pair containing the number and the rest of the list
 232         :rtype: tuple(value, list)
 233         """
 234         if not lst:
 235                 raise PreprocError('empty list for get_num')
 236         (p, v) = lst[0]
 237         if p == OP:
 238                 if v == '(':
 239                         count_par = 1
 240                         i = 1
 241                         while i < len(lst):
 242                                 (p, v) = lst[i]
 243
 244                                 if p == OP:
 245                                         if v == ')':
 246                                                 count_par -= 1
 247                                                 if count_par == 0:
 248                                                         break
 249                                         elif v == '(':
 250                                                 count_par += 1
 251                                 i += 1
 252                         else:
 253                                 raise PreprocError('rparen expected %r' % lst)
 254
 255                         (num, _) = get_term(lst[1:i])
 256                         return (num, lst[i+1:])
 257
 258                 elif v == '+':
 259                         return get_num(lst[1:])
 260                 elif v == '-':
 261                         num, lst = get_num(lst[1:])
 262                         return (reduce_nums('-1', num, '*'), lst)
 263                 elif v == '!':
 264                         num, lst = get_num(lst[1:])
 265                         return (int(not int(num)), lst)
 266                 elif v == '~':
 267                         num, lst = get_num(lst[1:])
 268                         return (~ int(num), lst)
 269                 else:
 270                         raise PreprocError('Invalid op token %r for get_num' % lst)
 271         elif p == NUM:
 272                 return v, lst[1:]
 273         elif p == IDENT:
 274                 # all macros should have been replaced, remaining identifiers eval to 0
 275                 return 0, lst[1:]
 276         else:
 277                 raise PreprocError('Invalid token %r for get_num' % lst)
 278
 279 def get_term(lst):
 280         """
 281         Evaluate an expression recursively, for example::
 282
 283                 1+1+1 -> 2+1 -> 3
 284
 285         :param lst: list of tokens
 286         :type lst: list of tuple(token, value)
 287         :return: the value and the remaining tokens
 288         :rtype: value, list
 289         """
 290
 291         if not lst:
 292                 raise PreprocError('empty list for get_term')
 293         num, lst = get_num(lst)
 294         if not lst:
 295                 return (num, [])
 296         (p, v) = lst[0]
 297         if p == OP:
 298                 if v == ',':
 299                         # skip
 300                         return get_term(lst[1:])
 301                 elif v == '?':
 302                         count_par = 0
 303                         i = 1
 304                         while i < len(lst):
 305                                 (p, v) = lst[i]
 306
 307                                 if p == OP:
 308                                         if v == ')':
 309                                                 count_par -= 1
 310                                         elif v == '(':
 311                                                 count_par += 1
 312                                         elif v == ':':
 313                                                 if count_par == 0:
 314                                                         break
 315                                 i += 1
 316                         else:
 317                                 raise PreprocError('rparen expected %r' % lst)
 318
 319                         if int(num):
 320                                 return get_term(lst[1:i])
 321                         else:
 322                                 return get_term(lst[i+1:])
 323
 324                 else:
 325                         num2, lst = get_num(lst[1:])
 326
 327                         if not lst:
 328                                 # no more tokens to process
 329                                 num2 = reduce_nums(num, num2, v)
 330                                 return get_term([(NUM, num2)] + lst)
 331
 332                         # operator precedence
 333                         p2, v2 = lst[0]
 334                         if p2 != OP:
 335                                 raise PreprocError('op expected %r' % lst)
 336
 337                         if prec[v2] >= prec[v]:
 338                                 num2 = reduce_nums(num, num2, v)
 339                                 return get_term([(NUM, num2)] + lst)
 340                         else:
 341                                 num3, lst = get_num(lst[1:])
 342                                 num3 = reduce_nums(num2, num3, v2)
 343                                 return get_term([(NUM, num), (p, v), (NUM, num3)] + lst)
 344
 345
 346         raise PreprocError('cannot reduce %r' % lst)
 347
 348 def reduce_eval(lst):
 349         """
 350         Take a list of tokens and output true or false for #if/#elif conditions.
 351
 352         :param lst: a list of tokens
 353         :type lst: list of tuple(token, value)
 354         :return: a token
 355         :rtype: tuple(NUM, int)
 356         """
 357         num, lst = get_term(lst)
 358         return (NUM, num)
 359
 360 def stringize(lst):
 361         """
 362         Merge a list of tokens into a string
 363
 364         :param lst: a list of tokens
 365         :type lst: list of tuple(token, value)
 366         :rtype: string
 367         """
 368         lst = [str(v2) for (p2, v2) in lst]
 369         return "".join(lst)
 370
 371 def paste_tokens(t1, t2):
 372         """
 373         Token pasting works between identifiers, particular operators, and identifiers and numbers::
 374
 375                 a ## b  ->  ab
 376                 > ## =  ->  >=
 377                 a ## 2  ->  a2
 378
 379         :param t1: token
 380         :type t1: tuple(type, value)
 381         :param t2: token
 382         :type t2: tuple(type, value)
 383         """
 384         p1 = None
 385         if t1[0] == OP and t2[0] == OP:
 386                 p1 = OP
 387         elif t1[0] == IDENT and (t2[0] == IDENT or t2[0] == NUM):
 388                 p1 = IDENT
 389         elif t1[0] == NUM and t2[0] == NUM:
 390                 p1 = NUM
 391         if not p1:
 392                 raise PreprocError('tokens do not make a valid paste %r and %r' % (t1, t2))
 393         return (p1, t1[1] + t2[1])
 394
 395 def reduce_tokens(lst, defs, ban=[]):
 396         """
 397         Replace the tokens in lst, using the macros provided in defs, and a list of macros that cannot be re-applied
 398
 399         :param lst: list of tokens
 400         :type lst: list of tuple(token, value)
 401         :param defs: macro definitions
 402         :type defs: dict
 403         :param ban: macros that cannot be substituted (recursion is not allowed)
 404         :type ban: list of string
 405         :return: the new list of tokens
 406         :rtype: value, list
 407         """
 408
 409         i = 0
 410         while i < len(lst):
 411                 (p, v) = lst[i]
 412
 413                 if p == IDENT and v == "defined":
 414                         del lst[i]
 415                         if i < len(lst):
 416                                 (p2, v2) = lst[i]
 417                                 if p2 == IDENT:
 418                                         if v2 in defs:
 419                                                 lst[i] = (NUM, 1)
 420                                         else:
 421                                                 lst[i] = (NUM, 0)
 422                                 elif p2 == OP and v2 == '(':
 423                                         del lst[i]
 424                                         (p2, v2) = lst[i]
 425                                         del lst[i] # remove the ident, and change the ) for the value
 426                                         if v2 in defs:
 427                                                 lst[i] = (NUM, 1)
 428                                         else:
 429                                                 lst[i] = (NUM, 0)
 430                                 else:
 431                                         raise PreprocError('Invalid define expression %r' % lst)
 432
 433                 elif p == IDENT and v in defs:
 434
 435                         if isinstance(defs[v], str):
 436                                 a, b = extract_macro(defs[v])
 437                                 defs[v] = b
 438                         macro_def = defs[v]
 439                         to_add = macro_def[1]
 440
 441                         if isinstance(macro_def[0], list):
 442                                 # macro without arguments
 443                                 del lst[i]
 444                                 accu = to_add[:]
 445                                 reduce_tokens(accu, defs, ban+[v])
 446                                 for tmp in accu:
 447                                         lst.insert(i, tmp)
 448                                         i += 1
 449                         else:
 450                                 # collect the arguments for the funcall
 451
 452                                 args = []
 453                                 del lst[i]
 454
 455                                 if i >= len(lst):
 456                                         raise PreprocError('expected ( after %r (got nothing)' % v)
 457
 458                                 (p2, v2) = lst[i]
 459                                 if p2 != OP or v2 != '(':
 460                                         raise PreprocError('expected ( after %r' % v)
 461
 462                                 del lst[i]
 463
 464                                 one_param = []
 465                                 count_paren = 0
 466                                 while i < len(lst):
 467                                         p2, v2 = lst[i]
 468
 469                                         del lst[i]
 470                                         if p2 == OP and count_paren == 0:
 471                                                 if v2 == '(':
 472                                                         one_param.append((p2, v2))
 473                                                         count_paren += 1
 474                                                 elif v2 == ')':
 475                                                         if one_param:
 476                                                                 args.append(one_param)
 477                                                         break
 478                                                 elif v2 == ',':
 479                                                         if not one_param:
 480                                                                 raise PreprocError('empty param in funcall %r' % v)
 481                                                         args.append(one_param)
 482                                                         one_param = []
 483                                                 else:
 484                                                         one_param.append((p2, v2))
 485                                         else:
 486                                                 one_param.append((p2, v2))
 487                                                 if   v2 == '(':
 488                                                         count_paren += 1
 489                                                 elif v2 == ')':
 490                                                         count_paren -= 1
 491                                 else:
 492                                         raise PreprocError('malformed macro')
 493
 494                                 # substitute the arguments within the define expression
 495                                 accu = []
 496                                 arg_table = macro_def[0]
 497                                 j = 0
 498                                 while j < len(to_add):
 499                                         (p2, v2) = to_add[j]
 500
 501                                         if p2 == OP and v2 == '#':
 502                                                 # stringize is for arguments only
 503                                                 if j+1 < len(to_add) and to_add[j+1][0] == IDENT and to_add[j+1][1] in arg_table:
 504                                                         toks = args[arg_table[to_add[j+1][1]]]
 505                                                         accu.append((STR, stringize(toks)))
 506                                                         j += 1
 507                                                 else:
 508                                                         accu.append((p2, v2))
 509                                         elif p2 == OP and v2 == '##':
 510                                                 # token pasting, how can man invent such a complicated system?
 511                                                 if accu and j+1 < len(to_add):
 512                                                         # we have at least two tokens
 513
 514                                                         t1 = accu[-1]
 515
 516                                                         if to_add[j+1][0] == IDENT and to_add[j+1][1] in arg_table:
 517                                                                 toks = args[arg_table[to_add[j+1][1]]]
 518
 519                                                                 if toks:
 520                                                                         accu[-1] = paste_tokens(t1, toks[0]) #(IDENT, accu[-1][1] + toks[0][1])
 521                                                                         accu.extend(toks[1:])
 522                                                                 else:
 523                                                                         # error, case "a##"
 524                                                                         accu.append((p2, v2))
 525                                                                         accu.extend(toks)
 526                                                         elif to_add[j+1][0] == IDENT and to_add[j+1][1] == '__VA_ARGS__':
 527                                                                 # first collect the tokens
 528                                                                 va_toks = []
 529                                                                 st = len(macro_def[0])
 530                                                                 pt = len(args)
 531                                                                 for x in args[pt-st+1:]:
 532                                                                         va_toks.extend(x)
 533                                                                         va_toks.append((OP, ','))
 534                                                                 if va_toks:
 535                                                                         va_toks.pop() # extra comma
 536                                                                 if len(accu)>1:
 537                                                                         (p3, v3) = accu[-1]
 538                                                                         (p4, v4) = accu[-2]
 539                                                                         if v3 == '##':
 540                                                                                 # remove the token paste
 541                                                                                 accu.pop()
 542                                                                                 if v4 == ',' and pt < st:
 543                                                                                         # remove the comma
 544                                                                                         accu.pop()
 545                                                                 accu += va_toks
 546                                                         else:
 547                                                                 accu[-1] = paste_tokens(t1, to_add[j+1])
 548
 549                                                         j += 1
 550                                                 else:
 551                                                         # Invalid paste, case    "##a" or "b##"
 552                                                         accu.append((p2, v2))
 553
 554                                         elif p2 == IDENT and v2 in arg_table:
 555                                                 toks = args[arg_table[v2]]
 556                                                 reduce_tokens(toks, defs, ban+[v])
 557                                                 accu.extend(toks)
 558                                         else:
 559                                                 accu.append((p2, v2))
 560
 561                                         j += 1
 562
 563
 564                                 reduce_tokens(accu, defs, ban+[v])
 565
 566                                 for x in range(len(accu)-1, -1, -1):
 567                                         lst.insert(i, accu[x])
 568
 569                 i += 1
 570
 571
 572 def eval_macro(lst, defs):
 573         """
 574         Reduce the tokens by :py:func:`waflib.Tools.c_preproc.reduce_tokens` and try to return a 0/1 result by :py:func:`waflib.Tools.c_preproc.reduce_eval`.
 575
 576         :param lst: list of tokens
 577         :type lst: list of tuple(token, value)
 578         :param defs: macro definitions
 579         :type defs: dict
 580         :rtype: int
 581         """
 582         reduce_tokens(lst, defs, [])
 583         if not lst:
 584                 raise PreprocError('missing tokens to evaluate')
 585
 586         if lst:
 587                 p, v = lst[0]
 588                 if p == IDENT and v not in defs:
 589                         raise PreprocError('missing macro %r' % lst)
 590
 591         p, v = reduce_eval(lst)
 592         return int(v) != 0
 593
 594 def extract_macro(txt):
 595         """
 596         Process a macro definition of the form::
 597                  #define f(x, y) x * y
 598
 599         into a function or a simple macro without arguments
 600
 601         :param txt: expression to exact a macro definition from
 602         :type txt: string
 603         :return: a tuple containing the name, the list of arguments and the replacement
 604         :rtype: tuple(string, [list, list])
 605         """
 606         t = tokenize(txt)
 607         if re_fun.search(txt):
 608                 p, name = t[0]
 609
 610                 p, v = t[1]
 611                 if p != OP:
 612                         raise PreprocError('expected (')
 613
 614                 i = 1
 615                 pindex = 0
 616                 params = {}
 617                 prev = '('
 618
 619                 while 1:
 620                         i += 1
 621                         p, v = t[i]
 622
 623                         if prev == '(':
 624                                 if p == IDENT:
 625                                         params[v] = pindex
 626                                         pindex += 1
 627                                         prev = p
 628                                 elif p == OP and v == ')':
 629                                         break
 630                                 else:
 631                                         raise PreprocError('unexpected token (3)')
 632                         elif prev == IDENT:
 633                                 if p == OP and v == ',':
 634                                         prev = v
 635                                 elif p == OP and v == ')':
 636                                         break
 637                                 else:
 638                                         raise PreprocError('comma or ... expected')
 639                         elif prev == ',':
 640                                 if p == IDENT:
 641                                         params[v] = pindex
 642                                         pindex += 1
 643                                         prev = p
 644                                 elif p == OP and v == '...':
 645                                         raise PreprocError('not implemented (1)')
 646                                 else:
 647                                         raise PreprocError('comma or ... expected (2)')
 648                         elif prev == '...':
 649                                 raise PreprocError('not implemented (2)')
 650                         else:
 651                                 raise PreprocError('unexpected else')
 652
 653                 #~ print (name, [params, t[i+1:]])
 654                 return (name, [params, t[i+1:]])
 655         else:
 656                 (p, v) = t[0]
 657                 if len(t) > 1:
 658                         return (v, [[], t[1:]])
 659                 else:
 660                         # empty define, assign an empty token
 661                         return (v, [[], [('T','')]])
 662
 663 re_include = re.compile('^\s*(<(?:.*)>|"(?:.*)")')
 664 def extract_include(txt, defs):
 665         """
 666         Process a line in the form::
 667
 668                 #include foo
 669
 670         :param txt: include line to process
 671         :type txt: string
 672         :param defs: macro definitions
 673         :type defs: dict
 674         :return: the file name
 675         :rtype: string
 676         """
 677         m = re_include.search(txt)
 678         if m:
 679                 txt = m.group(1)
 680                 return txt[0], txt[1:-1]
 681
 682         # perform preprocessing and look at the result, it must match an include
 683         toks = tokenize(txt)
 684         reduce_tokens(toks, defs, ['waf_include'])
 685
 686         if not toks:
 687                 raise PreprocError('could not parse include %r' % txt)
 688
 689         if len(toks) == 1:
 690                 if toks[0][0] == STR:
 691                         return '"', toks[0][1]
 692         else:
 693                 if toks[0][1] == '<' and toks[-1][1] == '>':
 694                         ret = '<', stringize(toks).lstrip('<').rstrip('>')
 695                         return ret
 696
 697         raise PreprocError('could not parse include %r' % txt)
 698
 699 def parse_char(txt):
 700         """
 701         Parse a c character
 702
 703         :param txt: character to parse
 704         :type txt: string
 705         :return: a character literal
 706         :rtype: string
 707         """
 708
 709         if not txt:
 710                 raise PreprocError('attempted to parse a null char')
 711         if txt[0] != '\\':
 712                 return ord(txt)
 713         c = txt[1]
 714         if c == 'x':
 715                 if len(txt) == 4 and txt[3] in string.hexdigits:
 716                         return int(txt[2:], 16)
 717                 return int(txt[2:], 16)
 718         elif c.isdigit():
 719                 if c == '0' and len(txt)==2:
 720                         return 0
 721                 for i in 3, 2, 1:
 722                         if len(txt) > i and txt[1:1+i].isdigit():
 723                                 return (1+i, int(txt[1:1+i], 8))
 724         else:
 725                 try:
 726                         return chr_esc[c]
 727                 except KeyError:
 728                         raise PreprocError('could not parse char literal %r' % txt)
 729
 730 def tokenize(s):
 731         """
 732         Convert a string into a list of tokens (shlex.split does not apply to c/c++/d)
 733
 734         :param s: input to tokenize
 735         :type s: string
 736         :return: a list of tokens
 737         :rtype: list of tuple(token, value)
 738         """
 739         return tokenize_private(s)[:] # force a copy of the results
 740
 741 def tokenize_private(s):
 742         ret = []
 743         for match in re_clexer.finditer(s):
 744                 m = match.group
 745                 for name in tok_types:
 746                         v = m(name)
 747                         if v:
 748                                 if name == IDENT:
 749                                         if v in g_optrans:
 750                                                 name = OP
 751                                         elif v.lower() == "true":
 752                                                 v = 1
 753                                                 name = NUM
 754                                         elif v.lower() == "false":
 755                                                 v = 0
 756                                                 name = NUM
 757                                 elif name == NUM:
 758                                         if m('oct'):
 759                                                 v = int(v, 8)
 760                                         elif m('hex'):
 761                                                 v = int(m('hex'), 16)
 762                                         elif m('n0'):
 763                                                 v = m('n0')
 764                                         else:
 765                                                 v = m('char')
 766                                                 if v:
 767                                                         v = parse_char(v)
 768                                                 else:
 769                                                         v = m('n2') or m('n4')
 770                                 elif name == OP:
 771                                         if v == '%:':
 772                                                 v = '#'
 773                                         elif v == '%:%:':
 774                                                 v = '##'
 775                                 elif name == STR:
 776                                         # remove the quotes around the string
 777                                         v = v[1:-1]
 778                                 ret.append((name, v))
 779                                 break
 780         return ret
 781
 782 def format_defines(lst):
 783         ret = []
 784         for y in lst:
 785                 if y:
 786                         pos = y.find('=')
 787                         if pos == -1:
 788                                 # "-DFOO" should give "#define FOO 1"
 789                                 ret.append(y)
 790                         elif pos > 0:
 791                                 # all others are assumed to be -DX=Y
 792                                 ret.append('%s %s' % (y[:pos], y[pos+1:]))
 793                         else:
 794                                 raise ValueError('Invalid define expression %r' % y)
 795         return ret
 796
 797 class c_parser(object):
 798         """
 799         Used by :py:func:`waflib.Tools.c_preproc.scan` to parse c/h files. Note that by default,
 800         only project headers are parsed.
 801         """
 802         def __init__(self, nodepaths=None, defines=None):
 803                 self.lines = []
 804                 """list of lines read"""
 805
 806                 if defines is None:
 807                         self.defs  = {}
 808                 else:
 809                         self.defs  = dict(defines) # make a copy
 810                 self.state = []
 811
 812                 self.count_files = 0
 813                 self.currentnode_stack = []
 814
 815                 self.nodepaths = nodepaths or []
 816                 """Include paths"""
 817
 818                 self.nodes = []
 819                 """List of :py:class:`waflib.Node.Node` found so far"""
 820
 821                 self.names = []
 822                 """List of file names that could not be matched by any file"""
 823
 824                 self.curfile = ''
 825                 """Current file"""
 826
 827                 self.ban_includes = set()
 828                 """Includes that must not be read (#pragma once)"""
 829
 830                 self.listed = set()
 831                 """Include nodes/names already listed to avoid duplicates in self.nodes/self.names"""
 832
 833         def cached_find_resource(self, node, filename):
 834                 """
 835                 Find a file from the input directory
 836
 837                 :param node: directory
 838                 :type node: :py:class:`waflib.Node.Node`
 839                 :param filename: header to find
 840                 :type filename: string
 841                 :return: the node if found, or None
 842                 :rtype: :py:class:`waflib.Node.Node`
 843                 """
 844                 try:
 845                         cache = node.ctx.preproc_cache_node
 846                 except AttributeError:
 847                         cache = node.ctx.preproc_cache_node = Utils.lru_cache(FILE_CACHE_SIZE)
 848
 849                 key = (node, filename)
 850                 try:
 851                         return cache[key]
 852                 except KeyError:
 853                         ret = node.find_resource(filename)
 854                         if ret:
 855                                 if getattr(ret, 'children', None):
 856                                         ret = None
 857                                 elif ret.is_child_of(node.ctx.bldnode):
 858                                         tmp = node.ctx.srcnode.search_node(ret.path_from(node.ctx.bldnode))
 859                                         if tmp and getattr(tmp, 'children', None):
 860                                                 ret = None
 861                         cache[key] = ret
 862                         return ret
 863
 864         def tryfind(self, filename, kind='"', env=None):
 865                 """
 866                 Try to obtain a node from the filename based from the include paths. Will add
 867                 the node found to :py:attr:`waflib.Tools.c_preproc.c_parser.nodes` or the file name to
 868                 :py:attr:`waflib.Tools.c_preproc.c_parser.names` if no corresponding file is found. Called by
 869                 :py:attr:`waflib.Tools.c_preproc.c_parser.start`.
 870
 871                 :param filename: header to find
 872                 :type filename: string
 873                 :return: the node if found
 874                 :rtype: :py:class:`waflib.Node.Node`
 875                 """
 876                 if filename.endswith('.moc'):
 877                         # we could let the qt4 module use a subclass, but then the function "scan" below must be duplicated
 878                         # in the qt4 and in the qt5 classes. So we have two lines here and it is sufficient.
 879                         self.names.append(filename)
 880                         return None
 881
 882                 self.curfile = filename
 883
 884                 found = None
 885                 if kind == '"':
 886                         if env.MSVC_VERSION:
 887                                 for n in reversed(self.currentnode_stack):
 888                                         found = self.cached_find_resource(n, filename)
 889                                         if found:
 890                                                 break
 891                         else:
 892                                 found = self.cached_find_resource(self.currentnode_stack[-1], filename)
 893
 894                 if not found:
 895                         for n in self.nodepaths:
 896                                 found = self.cached_find_resource(n, filename)
 897                                 if found:
 898                                         break
 899
 900                 listed = self.listed
 901                 if found and not found in self.ban_includes:
 902                         if found not in listed:
 903                                 listed.add(found)
 904                                 self.nodes.append(found)
 905                         self.addlines(found)
 906                 else:
 907                         if filename not in listed:
 908                                 listed.add(filename)
 909                                 self.names.append(filename)
 910                 return found
 911
 912         def filter_comments(self, node):
 913                 """
 914                 Filter the comments from a c/h file, and return the preprocessor lines.
 915                 The regexps :py:attr:`waflib.Tools.c_preproc.re_cpp`, :py:attr:`waflib.Tools.c_preproc.re_nl` and :py:attr:`waflib.Tools.c_preproc.re_lines` are used internally.
 916
 917                 :return: the preprocessor directives as a list of (keyword, line)
 918                 :rtype: a list of string pairs
 919                 """
 920                 # return a list of tuples : keyword, line
 921                 code = node.read()
 922                 if use_trigraphs:
 923                         for (a, b) in trig_def:
 924                                 code = code.split(a).join(b)
 925                 code = re_nl.sub('', code)
 926                 code = re_cpp.sub(repl, code)
 927                 return re_lines.findall(code)
 928
 929         def parse_lines(self, node):
 930                 try:
 931                         cache = node.ctx.preproc_cache_lines
 932                 except AttributeError:
 933                         cache = node.ctx.preproc_cache_lines = Utils.lru_cache(LINE_CACHE_SIZE)
 934                 try:
 935                         return cache[node]
 936                 except KeyError:
 937                         cache[node] = lines = self.filter_comments(node)
 938                         lines.append((POPFILE, ''))
 939                         lines.reverse()
 940                         return lines
 941
 942         def addlines(self, node):
 943                 """
 944                 Add the lines from a header in the list of preprocessor lines to parse
 945
 946                 :param node: header
 947                 :type node: :py:class:`waflib.Node.Node`
 948                 """
 949
 950                 self.currentnode_stack.append(node.parent)
 951
 952                 self.count_files += 1
 953                 if self.count_files > recursion_limit:
 954                         # issue #812
 955                         raise PreprocError('recursion limit exceeded')
 956
 957                 if Logs.verbose:
 958                         Logs.debug('preproc: reading file %r', node)
 959                 try:
 960                         lines = self.parse_lines(node)
 961                 except EnvironmentError:
 962                         raise PreprocError('could not read the file %r' % node)
 963                 except Exception:
 964                         if Logs.verbose > 0:
 965                                 Logs.error('parsing %r failed %s', node, traceback.format_exc())
 966                 else:
 967                         self.lines.extend(lines)
 968
 969         def start(self, node, env):
 970                 """
 971                 Preprocess a source file to obtain the dependencies, which are accumulated to :py:attr:`waflib.Tools.c_preproc.c_parser.nodes`
 972                 and :py:attr:`waflib.Tools.c_preproc.c_parser.names`.
 973
 974                 :param node: source file
 975                 :type node: :py:class:`waflib.Node.Node`
 976                 :param env: config set containing additional defines to take into account
 977                 :type env: :py:class:`waflib.ConfigSet.ConfigSet`
 978                 """
 979                 Logs.debug('preproc: scanning %s (in %s)', node.name, node.parent.name)
 980
 981                 self.current_file = node
 982                 self.addlines(node)
 983
 984                 # macros may be defined on the command-line, so they must be parsed as if they were part of the file
 985                 if env.DEFINES:
 986                         lst = format_defines(env.DEFINES)
 987                         lst.reverse()
 988                         self.lines.extend([('define', x) for x in lst])
 989
 990                 while self.lines:
 991                         (token, line) = self.lines.pop()
 992                         if token == POPFILE:
 993                                 self.count_files -= 1
 994                                 self.currentnode_stack.pop()
 995                                 continue
 996
 997                         try:
 998                                 state = self.state
 999
1000                                 # make certain we define the state if we are about to enter in an if block
1001                                 if token[:2] == 'if':
1002                                         state.append(undefined)
1003                                 elif token == 'endif':
1004                                         state.pop()
1005
1006                                 # skip lines when in a dead 'if' branch, wait for the endif
1007                                 if token[0] != 'e':
1008                                         if skipped in self.state or ignored in self.state:
1009                                                 continue
1010
1011                                 if token == 'if':
1012                                         ret = eval_macro(tokenize(line), self.defs)
1013                                         if ret:
1014                                                 state[-1] = accepted
1015                                         else:
1016                                                 state[-1] = ignored
1017                                 elif token == 'ifdef':
1018                                         m = re_mac.match(line)
1019                                         if m and m.group() in self.defs:
1020                                                 state[-1] = accepted
1021                                         else:
1022                                                 state[-1] = ignored
1023                                 elif token == 'ifndef':
1024                                         m = re_mac.match(line)
1025                                         if m and m.group() in self.defs:
1026                                                 state[-1] = ignored
1027                                         else:
1028                                                 state[-1] = accepted
1029                                 elif token == 'include' or token == 'import':
1030                                         (kind, inc) = extract_include(line, self.defs)
1031                                         self.current_file = self.tryfind(inc, kind, env)
1032                                         if token == 'import':
1033                                                 self.ban_includes.add(self.current_file)
1034                                 elif token == 'elif':
1035                                         if state[-1] == accepted:
1036                                                 state[-1] = skipped
1037                                         elif state[-1] == ignored:
1038                                                 if eval_macro(tokenize(line), self.defs):
1039                                                         state[-1] = accepted
1040                                 elif token == 'else':
1041                                         if state[-1] == accepted:
1042                                                 state[-1] = skipped
1043                                         elif state[-1] == ignored:
1044                                                 state[-1] = accepted
1045                                 elif token == 'define':
1046                                         try:
1047                                                 self.defs[self.define_name(line)] = line
1048                                         except AttributeError:
1049                                                 raise PreprocError('Invalid define line %r' % line)
1050                                 elif token == 'undef':
1051                                         m = re_mac.match(line)
1052                                         if m and m.group() in self.defs:
1053                                                 self.defs.__delitem__(m.group())
1054                                                 #print "undef %s" % name
1055                                 elif token == 'pragma':
1056                                         if re_pragma_once.match(line.lower()):
1057                                                 self.ban_includes.add(self.current_file)
1058                         except Exception as e:
1059                                 if Logs.verbose:
1060                                         Logs.debug('preproc: line parsing failed (%s): %s %s', e, line, traceback.format_exc())
1061
1062         def define_name(self, line):
1063                 """
1064                 :param line: define line
1065                 :type line: string
1066                 :rtype: string
1067                 :return: the define name
1068                 """
1069                 return re_mac.match(line).group()
1070
1071 def scan(task):
1072         """
1073         Get the dependencies using a c/c++ preprocessor, this is required for finding dependencies of the kind::
1074
1075                 #include some_macro()
1076
1077         This function is bound as a task method on :py:class:`waflib.Tools.c.c` and :py:class:`waflib.Tools.cxx.cxx` for example
1078         """
1079         try:
1080                 incn = task.generator.includes_nodes
1081         except AttributeError:
1082                 raise Errors.WafError('%r is missing a feature such as "c", "cxx" or "includes": ' % task.generator)
1083
1084         if go_absolute:
1085                 nodepaths = incn + [task.generator.bld.root.find_dir(x) for x in standard_includes]
1086         else:
1087                 nodepaths = [x for x in incn if x.is_child_of(x.ctx.srcnode) or x.is_child_of(x.ctx.bldnode)]
1088
1089         tmp = c_parser(nodepaths)
1090         tmp.start(task.inputs[0], task.env)
1091         return (tmp.nodes, tmp.names)