src/gromacs/gmxlib/nonbonded/preprocessor/gmxpreprocess.py

   1 #!/usr/bin/env python
   2 # Copyright (c) 2002-2008 ActiveState Software Inc.
   3 # License: MIT License (http://www.opensource.org/licenses/mit-license.php)
   4 # Original filename preprocess.py, see http://code.google.com/p/preprocess/
   5 #
   6 # Modified by Erik Lindahl 2009-2012 <lindahl@gromacs.org>
   7 # to enable advanced preprocessing for Gromacs kernels, including
   8 # preprocessor for-loops and substitution into preprocessor directives
   9 # as well as program strings.
  10 #
  11 # Please feel free to redistribute under same license as original (MIT),
  12 # but don't blame the original authors for mistakes in this version.
  13 #
  14
  15 """
  16     Preprocess a file.
  17
  18     Command Line Usage:
  19         gmxpreprocess [<options>...] <infile>
  20
  21     Options:
  22         -h, --help      Print this help and exit.
  23         -V, --version   Print the version info and exit.
  24         -v, --verbose   Give verbose output for errors.
  25
  26         -o <outfile>    Write output to the given file instead of to stdout.
  27         -f, --force     Overwrite given output file. (Otherwise an IOError
  28                         will be raised if <outfile> already exists.
  29         -D <define>     Define a variable for preprocessing. <define>
  30                         can simply be a variable name (in which case it
  31                         will be true) or it can be of the form
  32                         <var>=<val>. An attempt will be made to convert
  33                         <val> to an integer so "-D FOO=0" will create a
  34                         false value.
  35         -I <dir>        Add an directory to the include path for
  36                         #include directives.
  37
  38         -k, --keep-lines    Emit empty lines for preprocessor statement
  39                         lines and skipped output lines. This allows line
  40                         numbers to stay constant.
  41         -s, --no-substitute  Do NOT Substitute defines into emitted lines.
  42         -c, --content-types-path <path>
  43                         Specify a path to a content.types file to assist
  44                         with filetype determination. See the
  45                         `_gDefaultContentTypes` string in this file for
  46                         details on its format.
  47
  48     Module Usage:
  49         from gmxpreprocess import gmxpreprocess
  50         gmxpreprocess(infile, outfile=sys.stdout, defines={}, force=0,
  51                       keepLines=0, includePath=[], substitute=1,
  52                       contentType=None)
  53
  54     The <infile> can be marked up with special preprocessor statement lines
  55     of the form:
  56         <comment-prefix> <preprocessor-statement> <comment-suffix>
  57     where the <comment-prefix/suffix> are the native comment delimiters for
  58     that file type.
  59
  60
  61     Examples
  62     --------
  63
  64     HTML (*.htm, *.html) or XML (*.xml, *.kpf, *.xul) files:
  65
  66         <!-- #if FOO -->
  67         ...
  68         <!-- #endif -->
  69
  70     Python (*.py), Perl (*.pl), Tcl (*.tcl), Ruby (*.rb), Bash (*.sh),
  71     or make ([Mm]akefile*) files:
  72
  73         # #if defined('FAV_COLOR') and FAV_COLOR == "blue"
  74         ...
  75         # #elif FAV_COLOR == "red"
  76         ...
  77         # #else
  78         ...
  79         # #endif
  80
  81     C (*.c, *.h), C++ (*.cpp, *.cxx, *.cc, *.h, *.hpp, *.hxx, *.hh),
  82     Java (*.java), PHP (*.php) or C# (*.cs) files:
  83
  84         // #define FAV_COLOR 'blue'
  85         ...
  86         /* #ifndef FAV_COLOR */
  87         ...
  88         // #endif
  89
  90     Fortran 77 (*.f) or 90/95 (*.f90) files:
  91
  92         C     #if COEFF == 'var'
  93               ...
  94         C     #endif
  95
  96     And other languages.
  97
  98
  99     Preprocessor Syntax
 100     -------------------
 101
 102     - Valid statements:
 103         #define <var> [<value>]
 104         #undef <var>
 105         #ifdef <var>
 106         #ifndef <var>
 107         #if <expr>
 108         #elif <expr>
 109         #else
 110         #endif
 111         #error <error string>
 112         #include "<file>"
 113         #include <var>
 114       where <expr> is any valid Python expression.
 115     - The expression after #if/elif may be a Python statement. It is an
 116       error to refer to a variable that has not been defined by a -D
 117       option or by an in-content #define.
 118     - Special built-in methods for expressions:
 119         defined(varName)    Return true if given variable is defined.
 120
 121
 122     Tips
 123     ----
 124
 125     A suggested file naming convention is to let input files to
 126     preprocess be of the form <basename>.p.<ext> and direct the output
 127     of preprocess to <basename>.<ext>, e.g.:
 128         preprocess -o foo.py foo.p.py
 129     The advantage is that other tools (esp. editors) will still
 130     recognize the unpreprocessed file as the original language.
 131 """
 132
 133 __version_info__ = (1, 1, 0)
 134 __version__ = '.'.join(map(str, __version_info__))
 135
 136 import os
 137 import sys
 138 import getopt
 139 import types
 140 import re
 141 import pprint
 142
 143
 144
 145 #---- exceptions
 146
 147 class PreprocessError(Exception):
 148     def __init__(self, errmsg, file=None, lineno=None, line=None):
 149         self.errmsg = str(errmsg)
 150         self.file = file
 151         self.lineno = lineno
 152         self.line = line
 153         Exception.__init__(self, errmsg, file, lineno, line)
 154     def __str__(self):
 155         s = ""
 156         if self.file is not None:
 157             s += self.file + ":"
 158         if self.lineno is not None:
 159             s += str(self.lineno) + ":"
 160         if self.file is not None or self.lineno is not None:
 161             s += " "
 162         s += self.errmsg
 163         #if self.line is not None:
 164         #    s += ": " + self.line
 165         return s
 166
 167
 168
 169 #---- global data
 170
 171 # Comment delimiter info.
 172 #   A mapping of content type to a list of 2-tuples defining the line
 173 #   prefix and suffix for a comment. Each prefix or suffix can either
 174 #   be a string (in which case it is transformed into a pattern allowing
 175 #   whitespace on either side) or a compiled regex.
 176 _commentGroups = {
 177     "Python":     [ ('#', '') ],
 178     "Perl":       [ ('#', '') ],
 179     "PHP":        [ ('/*', '*/'), ('//', ''), ('#', '') ],
 180     "Ruby":       [ ('#', '') ],
 181     "Tcl":        [ ('#', '') ],
 182     "Shell":      [ ('#', '') ],
 183     # Allowing for CSS and JavaScript comments in XML/HTML.
 184     "XML":        [ ('<!--', '-->'), ('/*', '*/'), ('//', '') ],
 185     "HTML":       [ ('<!--', '-->'), ('/*', '*/'), ('//', '') ],
 186     "Makefile":   [ ('#', '') ],
 187     "JavaScript": [ ('/*', '*/'), ('//', '') ],
 188     "CSS":        [ ('/*', '*/') ],
 189     "C":          [ ('/*', '*/') ],
 190     "C++":        [ ('/*', '*/'), ('//', '') ],
 191     "Java":       [ ('/*', '*/'), ('//', '') ],
 192     "C#":         [ ('/*', '*/'), ('//', '') ],
 193     "IDL":        [ ('/*', '*/'), ('//', '') ],
 194     "Text":       [ ('#', '') ],
 195     "Fortran":    [ (re.compile(r'^[a-zA-Z*$]\s*'), ''), ('!', '') ],
 196     "TeX":        [ ('%', '') ],
 197 }
 198
 199
 200
 201 #---- internal logging facility
 202
 203 class _Logger:
 204     DEBUG, INFO, WARN, ERROR, CRITICAL = range(5)
 205     def __init__(self, name, level=None, streamOrFileName=sys.stderr):
 206         self._name = name
 207         if level is None:
 208             self.level = self.WARN
 209         else:
 210             self.level = level
 211         if type(streamOrFileName) == types.StringType:
 212             self.stream = open(streamOrFileName, 'w')
 213             self._opennedStream = 1
 214         else:
 215             self.stream = streamOrFileName
 216             self._opennedStream = 0
 217     def __del__(self):
 218         if self._opennedStream:
 219             self.stream.close()
 220     def getLevel(self):
 221         return self.level
 222     def setLevel(self, level):
 223         self.level = level
 224     def _getLevelName(self, level):
 225         levelNameMap = {
 226             self.DEBUG: "DEBUG",
 227             self.INFO: "INFO",
 228             self.WARN: "WARN",
 229             self.ERROR: "ERROR",
 230             self.CRITICAL: "CRITICAL",
 231         }
 232         return levelNameMap[level]
 233     def isEnabled(self, level):
 234         return level >= self.level
 235     def isDebugEnabled(self): return self.isEnabled(self.DEBUG)
 236     def isInfoEnabled(self): return self.isEnabled(self.INFO)
 237     def isWarnEnabled(self): return self.isEnabled(self.WARN)
 238     def isErrorEnabled(self): return self.isEnabled(self.ERROR)
 239     def isFatalEnabled(self): return self.isEnabled(self.FATAL)
 240     def log(self, level, msg, *args):
 241         if level < self.level:
 242             return
 243         message = "%s: %s: " % (self._name, self._getLevelName(level).lower())
 244         message = message + (msg % args) + "\n"
 245         self.stream.write(message)
 246         self.stream.flush()
 247     def debug(self, msg, *args):
 248         self.log(self.DEBUG, msg, *args)
 249     def info(self, msg, *args):
 250         self.log(self.INFO, msg, *args)
 251     def warn(self, msg, *args):
 252         self.log(self.WARN, msg, *args)
 253     def error(self, msg, *args):
 254         self.log(self.ERROR, msg, *args)
 255     def fatal(self, msg, *args):
 256         self.log(self.CRITICAL, msg, *args)
 257
 258 log = _Logger("gmxpreprocess", _Logger.WARN)
 259
 260
 261
 262 #---- internal support stuff
 263
 264 def SubstituteInternal(expr, defines):
 265     prevexpr = ''
 266     while (expr!=prevexpr):
 267         prevexpr=expr
 268         for name in reversed(sorted(defines, key=len)):
 269             value = defines[name]
 270             expr = expr.replace(name, str(value))
 271     return expr
 272
 273 def SubstituteInCode(expr, defines):
 274     prevexpr = ''
 275     while (expr!=prevexpr):
 276         prevexpr=expr
 277         for name in reversed(sorted(defines, key=len)):
 278             value = defines[name]
 279             expr = expr.replace('{' + name + '}', str(value))
 280     return expr
 281
 282
 283 def _evaluate(expr, defines):
 284     """Evaluate the given expression string with the given context.
 285
 286     WARNING: This runs eval() on a user string. This is unsafe.
 287     """
 288     #interpolated = _interpolate(s, defines)
 289
 290     try:
 291         rv = eval(expr, {'defined':lambda v: v in defines}, defines)
 292     except Exception, ex:
 293         msg = str(ex)
 294         if msg.startswith("name '") and msg.endswith("' is not defined"):
 295             # A common error (at least this is presumed:) is to have
 296             #   defined(FOO)   instead of   defined('FOO')
 297             # We should give a little as to what might be wrong.
 298             # msg == "name 'FOO' is not defined"  -->  varName == "FOO"
 299             varName = msg[len("name '"):-len("' is not defined")]
 300             if expr.find("defined(%s)" % varName) != -1:
 301                 # "defined(FOO)" in expr instead of "defined('FOO')"
 302                 msg += " (perhaps you want \"defined('%s')\" instead of "\
 303                        "\"defined(%s)\")" % (varName, varName)
 304         elif msg.startswith("invalid syntax"):
 305             msg = "invalid syntax: '%s'" % expr
 306         raise PreprocessError(msg, defines['__FILE__'], defines['__LINE__'])
 307     log.debug("evaluate %r -> %s (defines=%r)", expr, rv, defines)
 308
 309     return rv
 310
 311 #---- module API
 312
 313 def gmxpreprocess(infile, outfile=sys.stdout, defines={},
 314                force=0, keepLines=0, includePath=[], substitute=1,
 315                contentType=None, contentTypesRegistry=None,
 316                __preprocessedFiles=None):
 317     """Preprocess the given file.
 318
 319     "infile" is the input path.
 320     "outfile" is the output path or stream (default is sys.stdout).
 321     "defines" is a dictionary of defined variables that will be
 322         understood in preprocessor statements. Keys must be strings and,
 323         currently, only the truth value of any key's value matters.
 324     "force" will overwrite the given outfile if it already exists. Otherwise
 325         an IOError will be raise if the outfile already exists.
 326     "keepLines" will cause blank lines to be emitted for preprocessor lines
 327         and content lines that would otherwise be skipped.
 328     "includePath" is a list of directories to search for given #include
 329         directives. The directory of the file being processed is presumed.
 330     "substitute", if true, will allow substitution of defines into emitted
 331         lines. (NOTE: This substitution will happen within program strings
 332         as well. This may not be what you expect.)
 333     "contentType" can be used to specify the content type of the input
 334         file. It not given, it will be guessed.
 335     "contentTypesRegistry" is an instance of ContentTypesRegistry. If not specified
 336         a default registry will be created.
 337     "__preprocessedFiles" (for internal use only) is used to ensure files
 338         are not recusively preprocessed.
 339
 340     Returns the modified dictionary of defines or raises PreprocessError if
 341     there was some problem.
 342     """
 343     if __preprocessedFiles is None:
 344         __preprocessedFiles = []
 345     log.info("preprocess(infile=%r, outfile=%r, defines=%r, force=%r, "\
 346              "keepLines=%r, includePath=%r, contentType=%r, "\
 347              "__preprocessedFiles=%r)", infile, outfile, defines, force,
 348              keepLines, includePath, contentType, __preprocessedFiles)
 349     absInfile = os.path.normpath(os.path.abspath(infile))
 350     if absInfile in __preprocessedFiles:
 351         raise PreprocessError("detected recursive #include of '%s'"\
 352                               % infile)
 353     __preprocessedFiles.append(os.path.abspath(infile))
 354
 355     # Determine the content type and comment info for the input file.
 356     if contentType is None:
 357         registry = contentTypesRegistry or getDefaultContentTypesRegistry()
 358         contentType = registry.getContentType(infile)
 359         if contentType is None:
 360             contentType = "Text"
 361             log.warn("defaulting content type for '%s' to '%s'",
 362                      infile, contentType)
 363     try:
 364         cgs = _commentGroups[contentType]
 365     except KeyError:
 366         raise PreprocessError("don't know comment delimiters for content "\
 367                               "type '%s' (file '%s')"\
 368                               % (contentType, infile))
 369
 370     # Generate statement parsing regexes. Basic format:
 371     #       <comment-prefix> <preprocessor-stmt> <comment-suffix>
 372     #  Examples:
 373     #       <!-- #if foo -->
 374     #       ...
 375     #       <!-- #endif -->
 376     #
 377     #       # #if BAR
 378     #       ...
 379     #       # #else
 380     #       ...
 381     #       # #endif
 382     stmts = ['##\s*(?P<op>.*?)',
 383              '#\s*(?P<op>if|elif|ifdef|ifndef)\s+(?P<expr>.*?)',
 384              '#\s*(?P<op>else|endif)',
 385              '#\s*(?P<op>error)\s+(?P<error>.*?)',
 386              '#\s*(?P<op>define)\s+(?P<var>[^\s]*?)(\s+(?P<val>.+?))?',
 387              '#\s*(?P<op>undef)\s+(?P<var>[^\s]*?)',
 388              '#\s*(?P<op>for)\s+(?P<var>.*?)\s+((in)|(IN))\s+(?P<valuelist>.*?)',
 389              '#\s*(?P<op>endfor)',
 390              '#\s*(?P<op>include)\s+"(?P<fname>.*?)"',
 391              r'#\s*(?P<op>include)\s+(?P<var>[^\s]+?)',
 392             ]
 393     patterns = []
 394     for stmt in stmts:
 395         # The comment group prefix and suffix can either be just a
 396         # string or a compiled regex.
 397         for cprefix, csuffix in cgs:
 398             if hasattr(cprefix, "pattern"):
 399                 pattern = cprefix.pattern
 400             else:
 401                 pattern = r"^\s*%s\s*" % re.escape(cprefix)
 402             pattern += stmt
 403             if hasattr(csuffix, "pattern"):
 404                 pattern += csuffix.pattern
 405             else:
 406                 pattern += r"\s*%s\s*$" % re.escape(csuffix)
 407             patterns.append(pattern)
 408     stmtRes = [re.compile(p) for p in patterns]
 409
 410     # Process the input file.
 411     # (Would be helpful if I knew anything about lexing and parsing
 412     # simple grammars.)
 413     fin = open(infile, 'r')
 414     lines = fin.readlines()
 415     # Merge multi-line comments
 416     for i in range(len(lines)-1,-1,-1):
 417         line = lines[i].rstrip(' \r\n')
 418         if len(line)>0 and line[-1]=='\\':
 419             lines[i] = line[:-1] + ' ' + lines[i+1]
 420             lines[i+1] = ''    # keep an empty line to avoid screwing up line numbers
 421
 422     fin.close()
 423     if type(outfile) in types.StringTypes:
 424         if force and os.path.exists(outfile):
 425             os.chmod(outfile, 0777)
 426             os.remove(outfile)
 427         fout = open(outfile, 'w')
 428     else:
 429         fout = outfile
 430
 431     defines['__FILE__'] = infile
 432     SKIP, EMIT = range(2) # states
 433     states = [(EMIT,   # a state is (<emit-or-skip-lines-in-this-section>,
 434                0,      #             <have-emitted-in-this-if-block>,
 435                0)]     #             <have-seen-'else'-in-this-if-block>)
 436     lineNum = 0
 437     nlines = len(lines)
 438     forlevel = 0
 439     forvar = {}
 440     forvaluelist = {}
 441     forstartline = {}
 442     foriteration = {}
 443     last_emitted_was_blank = True
 444
 445     while lineNum<nlines:
 446
 447         line = lines[lineNum]
 448
 449         log.debug("line %d: %r", lineNum+1, line)
 450         defines['__LINE__'] = lineNum+1
 451
 452         # Is this line a preprocessor stmt line?
 453         #XXX Could probably speed this up by optimizing common case of
 454         #    line NOT being a preprocessor stmt line.
 455         for stmtRe in stmtRes:
 456             match = stmtRe.match(line)
 457             if match:
 458                 break
 459         else:
 460             match = None
 461
 462         if match:
 463
 464             # Remove contents after ## (comment)
 465             idx=line.find("##")
 466             if(idx>0):
 467                 line = line[0:idx]
 468
 469             op = match.group("op")
 470             log.debug("%r stmt (states: %r)", op, states)
 471             if op == "define":
 472                 if not (states and states[-1][0] == SKIP):
 473                     var, val = match.group("var", "val")
 474                     val = SubstituteInternal(str(val), defines)
 475
 476                     if val is None:
 477                         val = None
 478                     else:
 479                         try:
 480                             val = eval(val, {}, {})
 481                         except:
 482                             pass
 483                     defines[var] = val
 484             elif op == "undef":
 485                 if not (states and states[-1][0] == SKIP):
 486                     var = match.group("var")
 487                     try:
 488                         del defines[var]
 489                     except KeyError:
 490                         pass
 491             elif op == "include":
 492                 if not (states and states[-1][0] == SKIP):
 493                     if "var" in match.groupdict():
 494                         # This is the second include form: #include VAR
 495                         var = match.group("var")
 496                         f = defines[var]
 497                     else:
 498                         # This is the first include form: #include "path"
 499                         f = match.group("fname")
 500
 501                     for d in [os.path.dirname(infile)] + includePath:
 502                         fname = os.path.normpath(os.path.join(d, f))
 503                         if os.path.exists(fname):
 504                             break
 505                     else:
 506                         raise PreprocessError("could not find #include'd file "\
 507                                               "\"%s\" on include path: %r"\
 508                                               % (f, includePath))
 509                     defines = gmxpreprocess(fname, fout, defines, force,
 510                                             keepLines, includePath, substitute,
 511                                             contentTypesRegistry=contentTypesRegistry,
 512                                             __preprocessedFiles=__preprocessedFiles)
 513             elif op in ("if", "ifdef", "ifndef"):
 514                 if op == "if":
 515                     expr = match.group("expr")
 516                 elif op == "ifdef":
 517                     expr = "defined('%s')" % match.group("expr")
 518                 elif op == "ifndef":
 519                     expr = "not defined('%s')" % match.group("expr")
 520                 try:
 521                     if states and states[-1][0] == SKIP:
 522                         # Were are nested in a SKIP-portion of an if-block.
 523                         states.append((SKIP, 0, 0))
 524                     elif _evaluate(expr, defines):
 525                         states.append((EMIT, 1, 0))
 526                     else:
 527                         states.append((SKIP, 0, 0))
 528                 except KeyError:
 529                     raise PreprocessError("use of undefined variable in "\
 530                                           "#%s stmt" % op, defines['__FILE__'],
 531                                           defines['__LINE__'], line)
 532             elif op == "elif":
 533                 expr = match.group("expr")
 534                 try:
 535                     if states[-1][2]: # already had #else in this if-block
 536                         raise PreprocessError("illegal #elif after #else in "\
 537                             "same #if block", defines['__FILE__'],
 538                             defines['__LINE__'], line)
 539                     elif states[-1][1]: # if have emitted in this if-block
 540                         states[-1] = (SKIP, 1, 0)
 541                     elif states[:-1] and states[-2][0] == SKIP:
 542                         # Were are nested in a SKIP-portion of an if-block.
 543                         states[-1] = (SKIP, 0, 0)
 544                     elif _evaluate(expr, defines):
 545                         states[-1] = (EMIT, 1, 0)
 546                     else:
 547                         states[-1] = (SKIP, 0, 0)
 548                 except IndexError:
 549                     raise PreprocessError("#elif stmt without leading #if "\
 550                                           "stmt", defines['__FILE__'],
 551                                           defines['__LINE__'], line)
 552             elif op == "else":
 553                 try:
 554                     if states[-1][2]: # already had #else in this if-block
 555                         raise PreprocessError("illegal #else after #else in "\
 556                             "same #if block", defines['__FILE__'],
 557                             defines['__LINE__'], line)
 558                     elif states[-1][1]: # if have emitted in this if-block
 559                         states[-1] = (SKIP, 1, 1)
 560                     elif states[:-1] and states[-2][0] == SKIP:
 561                         # Were are nested in a SKIP-portion of an if-block.
 562                         states[-1] = (SKIP, 0, 1)
 563                     else:
 564                         states[-1] = (EMIT, 1, 1)
 565                 except IndexError:
 566                     raise PreprocessError("#else stmt without leading #if "\
 567                                           "stmt", defines['__FILE__'],
 568                                           defines['__LINE__'], line)
 569             elif op == "endif":
 570                 try:
 571                     states.pop()
 572                 except IndexError:
 573                     raise PreprocessError("#endif stmt without leading #if"\
 574                                           "stmt", defines['__FILE__'],
 575                                           defines['__LINE__'], line)
 576             elif op == "for":
 577
 578                 tmpstr     = match.group("var")
 579                 thisforvar = tmpstr.split(",")
 580                 for s in thisforvar:
 581                     s.strip(" ")
 582
 583                 # Thisforvar is now a _list_ if 1 or more for variables, without whitespace
 584
 585                 # Evaluate the list-of-values just in case it refers to a list variable
 586                 valuelist = _evaluate(match.group("valuelist"),defines)
 587                 # If a string, evaluate it again
 588                 if(isinstance(valuelist,str)):
 589                     valuelist = eval(valuelist)
 590
 591                 forlevel += 1
 592
 593                 forvar[forlevel]       = thisforvar
 594                 forvaluelist[forlevel] = valuelist
 595                 forstartline[forlevel] = lineNum + 1
 596                 foriteration[forlevel] = 0
 597
 598                 if(len(valuelist)>0):
 599                     # set the variable for this for-loop to the first value in the list for this level
 600                     nvar=len(thisforvar)
 601                     for i in range(nvar):
 602                         if(nvar==1):
 603                             val=valuelist[0]
 604                         else:
 605                             val=valuelist[0][i]
 606                         defines[thisforvar[i]] = val
 607
 608                 else:
 609                     # list was empty, so skip this entire section
 610                     states.append((SKIP, 0, 0))
 611
 612             elif op == "endfor":
 613                 foriteration[forlevel] += 1
 614                 # Should we do one more iteration on this level?
 615                 iter       = foriteration[forlevel]
 616                 thisforvar = forvar[forlevel]
 617                 valuelist  = forvaluelist[forlevel]
 618
 619                 if(iter<len(valuelist)):
 620
 621                     nvar = len(thisforvar)
 622                     for i in range(len(thisforvar)):
 623                         if(nvar==1):
 624                            val=valuelist[iter]
 625                         else:
 626                             val=valuelist[iter][i]
 627                         defines[thisforvar[i]] = val
 628
 629                     lineNum             = forstartline[forlevel]
 630                     continue
 631                 else:
 632                     forlevel -= 1
 633                     if(len(valuelist)==0):
 634                         states.pop()
 635
 636             elif op == "error":
 637                 if not (states and states[-1][0] == SKIP):
 638                     error = match.group("error")
 639                     raise PreprocessError("#error: "+error, defines['__FILE__'],
 640                                           defines['__LINE__'], line)
 641             log.debug("states: %r", states)
 642             if keepLines:
 643                 fout.write("\n")
 644         else:
 645             try:
 646                 if states[-1][0] == EMIT:
 647                     log.debug("emit line (%s)" % states[-1][1])
 648                     # Substitute all defines into line.
 649                     # XXX Should avoid recursive substitutions. But that
 650                     #     would be a pain right now.
 651
 652                     sline = line
 653                     if substitute:
 654                         sline = SubstituteInCode(sline,defines)
 655
 656                     emitted_line_is_blank = (sline.strip()=='')
 657                     if( not (emitted_line_is_blank and last_emitted_was_blank) and not keepLines):
 658                         fout.write(sline)
 659                         last_emitted_was_blank = emitted_line_is_blank
 660
 661                 elif keepLines:
 662                     log.debug("keep blank line (%s)" % states[-1][1])
 663                     fout.write("\n")
 664                 else:
 665                     log.debug("skip line (%s)" % states[-1][1])
 666             except IndexError:
 667                 raise PreprocessError("superfluous #endif before this line",
 668                                       defines['__FILE__'],
 669                                       defines['__LINE__'])
 670         lineNum += 1
 671
 672     if len(states) > 1:
 673         raise PreprocessError("unterminated #if block", defines['__FILE__'],
 674                               defines['__LINE__'])
 675     elif len(states) < 1:
 676         raise PreprocessError("superfluous #endif on or before this line",
 677                               defines['__FILE__'], defines['__LINE__'])
 678
 679     if fout != outfile:
 680         fout.close()
 681
 682     return defines
 683
 684
 685 #---- content-type handling
 686
 687 _gDefaultContentTypes = """
 688     # Default file types understood by "gmxpreprocess.py".
 689     #
 690     # Format is an extension of 'mime.types' file syntax.
 691     #   - '#' indicates a comment to the end of the line.
 692     #   - a line is:
 693     #       <filetype> [<pattern>...]
 694     #     where,
 695     #       <filetype>'s are equivalent in spirit to the names used in the Windows
 696     #           registry in HKCR, but some of those names suck or are inconsistent;
 697     #           and
 698     #       <pattern> is a suffix (pattern starts with a '.'), a regular expression
 699     #           (pattern is enclosed in '/' characters), a full filename (anything
 700     #           else).
 701     #
 702     # Notes on case-sensitivity:
 703     #
 704     # A suffix pattern is case-insensitive on Windows and case-sensitive
 705     # elsewhere.  A filename pattern is case-sensitive everywhere. A regex
 706     # pattern's case-sensitivity is defined by the regex. This means it is by
 707     # default case-sensitive, but this can be changed using Python's inline
 708     # regex option syntax. E.g.:
 709     #         Makefile            /^(?i)makefile.*$/   # case-INsensitive regex
 710
 711     Python              .py
 712     Python              .pyw
 713     Perl                .pl
 714     Ruby                .rb
 715     Tcl                 .tcl
 716     XML                 .xml
 717     XML                 .kpf
 718     XML                 .xul
 719     XML                 .rdf
 720     XML                 .xslt
 721     XML                 .xsl
 722     XML                 .wxs
 723     XML                 .wxi
 724     HTML                .htm
 725     HTML                .html
 726     XML                 .xhtml
 727     Makefile            /^[Mm]akefile.*$/
 728     PHP                 .php
 729     JavaScript          .js
 730     CSS                 .css
 731     C++                 .c       # C++ because then we can use //-style comments
 732     C++                 .cpp
 733     C++                 .cxx
 734     C++                 .cc
 735     C++                 .h
 736     C++                 .hpp
 737     C++                 .hxx
 738     C++                 .hh
 739     C++                 .gpp     # Gromacs pre-preprocessing
 740     IDL                 .idl
 741     Text                .txt
 742     Fortran             .f
 743     Fortran             .f90
 744     Shell               .sh
 745     Shell               .csh
 746     Shell               .ksh
 747     Shell               .zsh
 748     Java                .java
 749     C#                  .cs
 750     TeX                 .tex
 751
 752     # Some Komodo-specific file extensions
 753     Python              .ksf  # Fonts & Colors scheme files
 754     Text                .kkf  # Keybinding schemes files
 755 """
 756
 757 class ContentTypesRegistry:
 758     """A class that handles determining the filetype of a given path.
 759
 760     Usage:
 761         >>> registry = ContentTypesRegistry()
 762         >>> registry.getContentType("foo.py")
 763         "Python"
 764     """
 765
 766     def __init__(self, contentTypesPaths=None):
 767         """The constructor.
 768
 769         @param contentTypesPaths {str} Optional path to content.types file.
 770         """
 771         ## Path to content.types file to decide language
 772         self.contentTypesPaths = contentTypesPaths
 773         self._load()
 774
 775     def _load(self):
 776         from os.path import dirname, join, exists
 777
 778         ## initialize map of file suffixes to language
 779         self.suffixMap = {}
 780         ## initialize map of filename regex to language
 781         self.regexMap = {}
 782         ## initialize map of filenames to language
 783         self.filenameMap = {}
 784
 785         self._loadContentType(_gDefaultContentTypes)
 786         localContentTypesPath = join(dirname(__file__), "content.types")
 787         if exists(localContentTypesPath):
 788             log.debug("load content types file: `%r'" % localContentTypesPath)
 789             self._loadContentType(open(localContentTypesPath, 'r').read())
 790         for path in (self.contentTypesPaths or []):
 791             log.debug("load content types file: `%r'" % path)
 792             self._loadContentType(open(path, 'r').read())
 793
 794     def _loadContentType(self, content, path=None):
 795         """Return the registry for the given content.types file.
 796
 797         The registry is three mappings:
 798             <suffix> -> <content type>
 799             <regex> -> <content type>
 800             <filename> -> <content type>
 801         """
 802         for line in content.splitlines(0):
 803             words = line.strip().split()
 804             for i in range(len(words)):
 805                 if words[i][0] == '#':
 806                     del words[i:]
 807                     break
 808             if not words: continue
 809             contentType, patterns = words[0], words[1:]
 810             if not patterns:
 811                 if line[-1] == '\n': line = line[:-1]
 812                 raise PreprocessError("bogus content.types line, there must "\
 813                                       "be one or more patterns: '%s'" % line)
 814             for pattern in patterns:
 815                 if pattern.startswith('.'):
 816                     if sys.platform.startswith("win"):
 817                         # Suffix patterns are case-insensitive on Windows.
 818                         pattern = pattern.lower()
 819                     self.suffixMap[pattern] = contentType
 820                 elif pattern.startswith('/') and pattern.endswith('/'):
 821                     self.regexMap[re.compile(pattern[1:-1])] = contentType
 822                 else:
 823                     self.filenameMap[pattern] = contentType
 824
 825     def getContentType(self, path):
 826         """Return a content type for the given path.
 827
 828         @param path {str} The path of file for which to guess the
 829             content type.
 830         @returns {str|None} Returns None if could not determine the
 831             content type.
 832         """
 833         basename = os.path.basename(path)
 834         contentType = None
 835         # Try to determine from the path.
 836         if not contentType and self.filenameMap.has_key(basename):
 837             contentType = self.filenameMap[basename]
 838             log.debug("Content type of '%s' is '%s' (determined from full "\
 839                       "path).", path, contentType)
 840         # Try to determine from the suffix.
 841         if not contentType and '.' in basename:
 842             suffix = "." + basename.split(".")[-1]
 843             if sys.platform.startswith("win"):
 844                 # Suffix patterns are case-insensitive on Windows.
 845                 suffix = suffix.lower()
 846             if self.suffixMap.has_key(suffix):
 847                 contentType = self.suffixMap[suffix]
 848                 log.debug("Content type of '%s' is '%s' (determined from "\
 849                           "suffix '%s').", path, contentType, suffix)
 850         # Try to determine from the registered set of regex patterns.
 851         if not contentType:
 852             for regex, ctype in self.regexMap.items():
 853                 if regex.search(basename):
 854                     contentType = ctype
 855                     log.debug("Content type of '%s' is '%s' (matches regex '%s')",
 856                               path, contentType, regex.pattern)
 857                     break
 858         # Try to determine from the file contents.
 859         content = open(path, 'rb').read()
 860         if content.startswith("<?xml"):  # cheap XML sniffing
 861             contentType = "XML"
 862         return contentType
 863
 864 _gDefaultContentTypesRegistry = None
 865 def getDefaultContentTypesRegistry():
 866     global _gDefaultContentTypesRegistry
 867     if _gDefaultContentTypesRegistry is None:
 868         _gDefaultContentTypesRegistry = ContentTypesRegistry()
 869     return _gDefaultContentTypesRegistry
 870
 871
 872 #---- internal support stuff
 873 #TODO: move other internal stuff down to this section
 874
 875 try:
 876     reversed
 877 except NameError:
 878     # 'reversed' added in Python 2.4 (http://www.python.org/doc/2.4/whatsnew/node7.html)
 879     def reversed(seq):
 880         rseq = list(seq)
 881         rseq.reverse()
 882         for item in rseq:
 883             yield item
 884 try:
 885     sorted
 886 except NameError:
 887     # 'sorted' added in Python 2.4. Note that I'm only implementing enough
 888     # of sorted as is used in this module.
 889     def sorted(seq, key=None):
 890         identity = lambda x: x
 891         key_func = (key or identity)
 892         sseq = list(seq)
 893         sseq.sort(lambda self, other: cmp(key_func(self), key_func(other)))
 894         for item in sseq:
 895             yield item
 896
 897
 898 #---- mainline
 899
 900 def main(argv):
 901     try:
 902         optlist, args = getopt.getopt(argv[1:], 'hVvo:D:fkI:sc:',
 903             ['help', 'version', 'verbose', 'force', 'keep-lines',
 904              'no-substitute', 'content-types-path='])
 905     except getopt.GetoptError, msg:
 906         sys.stderr.write("gmxpreprocess: error: %s. Your invocation was: %s\n"\
 907                          % (msg, argv))
 908         sys.stderr.write("See 'gmxpreprocess --help'.\n")
 909         return 1
 910     outfile = sys.stdout
 911     defines = {}
 912     force = 0
 913     keepLines = 0
 914     substitute = 1
 915     includePath = []
 916     contentTypesPaths = []
 917     for opt, optarg in optlist:
 918         if opt in ('-h', '--help'):
 919             sys.stdout.write(__doc__)
 920             return 0
 921         elif opt in ('-V', '--version'):
 922             sys.stdout.write("gmxpreprocess %s\n" % __version__)
 923             return 0
 924         elif opt in ('-v', '--verbose'):
 925             log.setLevel(log.DEBUG)
 926         elif opt == '-o':
 927             outfile = optarg
 928         if opt in ('-f', '--force'):
 929             force = 1
 930         elif opt == '-D':
 931             if optarg.find('=') != -1:
 932                 var, val = optarg.split('=', 1)
 933                 try:
 934                     val = int(val)
 935                 except ValueError:
 936                     pass
 937             else:
 938                 var, val = optarg, None
 939             defines[var] = val
 940         elif opt in ('-k', '--keep-lines'):
 941             keepLines = 1
 942         elif opt == '-I':
 943             includePath.append(optarg)
 944         elif opt in ('-s', '--no-substitute'):
 945             substitute = 0
 946         elif opt in ('-c', '--content-types-path'):
 947             contentTypesPaths.append(optarg)
 948
 949     if len(args) != 1:
 950         sys.stderr.write("gmxpreprocess: error: incorrect number of "\
 951                          "arguments: argv=%r\n" % argv)
 952         return 1
 953     else:
 954         infile = args[0]
 955
 956     try:
 957         contentTypesRegistry = ContentTypesRegistry(contentTypesPaths)
 958         gmxpreprocess(infile, outfile, defines, force, keepLines, includePath,
 959                    substitute, contentTypesRegistry=contentTypesRegistry)
 960     except PreprocessError, ex:
 961         if log.isDebugEnabled():
 962             import traceback
 963             traceback.print_exc(file=sys.stderr)
 964         else:
 965             sys.stderr.write("gmxpreprocess: error: %s\n" % str(ex))
 966         return 1
 967
 968 if __name__ == "__main__":
 969     __file__ = sys.argv[0]
 970     sys.exit( main(sys.argv) )
 971