prepareGpuKernelArguments() and launchGpuKernel() are added
[gromacs.git] / src / gromacs / gmxlib / nonbonded / preprocessor / gmxpreprocess.py
blob8bf709048f185b238ac5d165c4285f71f9a88283
1 #!/usr/bin/env python
2 # Copyright (c) 2002-2008 ActiveState Software Inc.
3 # License: MIT License (http://www.opensource.org/licenses/mit-license.php)
4 # Original filename preprocess.py, see http://code.google.com/p/preprocess/
6 # Modified by Erik Lindahl 2009-2012 <lindahl@gromacs.org>
7 # to enable advanced preprocessing for Gromacs kernels, including
8 # preprocessor for-loops and substitution into preprocessor directives
9 # as well as program strings.
11 # Please feel free to redistribute under same license as original (MIT),
12 # but don't blame the original authors for mistakes in this version.
15 """
16 Preprocess a file.
18 Command Line Usage:
19 gmxpreprocess [<options>...] <infile>
21 Options:
22 -h, --help Print this help and exit.
23 -V, --version Print the version info and exit.
24 -v, --verbose Give verbose output for errors.
26 -o <outfile> Write output to the given file instead of to stdout.
27 -f, --force Overwrite given output file. (Otherwise an IOError
28 will be raised if <outfile> already exists.
29 -D <define> Define a variable for preprocessing. <define>
30 can simply be a variable name (in which case it
31 will be true) or it can be of the form
32 <var>=<val>. An attempt will be made to convert
33 <val> to an integer so "-D FOO=0" will create a
34 false value.
35 -I <dir> Add an directory to the include path for
36 #include directives.
38 -k, --keep-lines Emit empty lines for preprocessor statement
39 lines and skipped output lines. This allows line
40 numbers to stay constant.
41 -s, --no-substitute Do NOT Substitute defines into emitted lines.
42 -c, --content-types-path <path>
43 Specify a path to a content.types file to assist
44 with filetype determination. See the
45 `_gDefaultContentTypes` string in this file for
46 details on its format.
48 Module Usage:
49 from gmxpreprocess import gmxpreprocess
50 gmxpreprocess(infile, outfile=sys.stdout, defines={}, force=0,
51 keepLines=0, includePath=[], substitute=1,
52 contentType=None)
54 The <infile> can be marked up with special preprocessor statement lines
55 of the form:
56 <comment-prefix> <preprocessor-statement> <comment-suffix>
57 where the <comment-prefix/suffix> are the native comment delimiters for
58 that file type.
61 Examples
62 --------
64 HTML (*.htm, *.html) or XML (*.xml, *.kpf, *.xul) files:
66 <!-- #if FOO -->
67 ...
68 <!-- #endif -->
70 Python (*.py), Perl (*.pl), Tcl (*.tcl), Ruby (*.rb), Bash (*.sh),
71 or make ([Mm]akefile*) files:
73 # #if defined('FAV_COLOR') and FAV_COLOR == "blue"
74 ...
75 # #elif FAV_COLOR == "red"
76 ...
77 # #else
78 ...
79 # #endif
81 C (*.c, *.h), C++ (*.cpp, *.cxx, *.cc, *.h, *.hpp, *.hxx, *.hh),
82 Java (*.java), PHP (*.php) or C# (*.cs) files:
84 // #define FAV_COLOR 'blue'
85 ...
86 /* #ifndef FAV_COLOR */
87 ...
88 // #endif
90 Fortran 77 (*.f) or 90/95 (*.f90) files:
92 C #if COEFF == 'var'
93 ...
94 C #endif
96 And other languages.
99 Preprocessor Syntax
100 -------------------
102 - Valid statements:
103 #define <var> [<value>]
104 #undef <var>
105 #ifdef <var>
106 #ifndef <var>
107 #if <expr>
108 #elif <expr>
109 #else
110 #endif
111 #error <error string>
112 #include "<file>"
113 #include <var>
114 where <expr> is any valid Python expression.
115 - The expression after #if/elif may be a Python statement. It is an
116 error to refer to a variable that has not been defined by a -D
117 option or by an in-content #define.
118 - Special built-in methods for expressions:
119 defined(varName) Return true if given variable is defined.
122 Tips
123 ----
125 A suggested file naming convention is to let input files to
126 preprocess be of the form <basename>.p.<ext> and direct the output
127 of preprocess to <basename>.<ext>, e.g.:
128 preprocess -o foo.py foo.p.py
129 The advantage is that other tools (esp. editors) will still
130 recognize the unpreprocessed file as the original language.
133 __version_info__ = (1, 1, 0)
134 __version__ = '.'.join(map(str, __version_info__))
136 import os
137 import sys
138 import getopt
139 import types
140 import re
141 import pprint
145 #---- exceptions
147 class PreprocessError(Exception):
148 def __init__(self, errmsg, file=None, lineno=None, line=None):
149 self.errmsg = str(errmsg)
150 self.file = file
151 self.lineno = lineno
152 self.line = line
153 Exception.__init__(self, errmsg, file, lineno, line)
154 def __str__(self):
155 s = ""
156 if self.file is not None:
157 s += self.file + ":"
158 if self.lineno is not None:
159 s += str(self.lineno) + ":"
160 if self.file is not None or self.lineno is not None:
161 s += " "
162 s += self.errmsg
163 #if self.line is not None:
164 # s += ": " + self.line
165 return s
169 #---- global data
171 # Comment delimiter info.
172 # A mapping of content type to a list of 2-tuples defining the line
173 # prefix and suffix for a comment. Each prefix or suffix can either
174 # be a string (in which case it is transformed into a pattern allowing
175 # whitespace on either side) or a compiled regex.
176 _commentGroups = {
177 "Python": [ ('#', '') ],
178 "Perl": [ ('#', '') ],
179 "PHP": [ ('/*', '*/'), ('//', ''), ('#', '') ],
180 "Ruby": [ ('#', '') ],
181 "Tcl": [ ('#', '') ],
182 "Shell": [ ('#', '') ],
183 # Allowing for CSS and JavaScript comments in XML/HTML.
184 "XML": [ ('<!--', '-->'), ('/*', '*/'), ('//', '') ],
185 "HTML": [ ('<!--', '-->'), ('/*', '*/'), ('//', '') ],
186 "Makefile": [ ('#', '') ],
187 "JavaScript": [ ('/*', '*/'), ('//', '') ],
188 "CSS": [ ('/*', '*/') ],
189 "C": [ ('/*', '*/') ],
190 "C++": [ ('/*', '*/'), ('//', '') ],
191 "Java": [ ('/*', '*/'), ('//', '') ],
192 "C#": [ ('/*', '*/'), ('//', '') ],
193 "IDL": [ ('/*', '*/'), ('//', '') ],
194 "Text": [ ('#', '') ],
195 "Fortran": [ (re.compile(r'^[a-zA-Z*$]\s*'), ''), ('!', '') ],
196 "TeX": [ ('%', '') ],
201 #---- internal logging facility
203 class _Logger:
204 DEBUG, INFO, WARN, ERROR, CRITICAL = range(5)
205 def __init__(self, name, level=None, streamOrFileName=sys.stderr):
206 self._name = name
207 if level is None:
208 self.level = self.WARN
209 else:
210 self.level = level
211 if type(streamOrFileName) == types.StringType:
212 self.stream = open(streamOrFileName, 'w')
213 self._opennedStream = 1
214 else:
215 self.stream = streamOrFileName
216 self._opennedStream = 0
217 def __del__(self):
218 if self._opennedStream:
219 self.stream.close()
220 def getLevel(self):
221 return self.level
222 def setLevel(self, level):
223 self.level = level
224 def _getLevelName(self, level):
225 levelNameMap = {
226 self.DEBUG: "DEBUG",
227 self.INFO: "INFO",
228 self.WARN: "WARN",
229 self.ERROR: "ERROR",
230 self.CRITICAL: "CRITICAL",
232 return levelNameMap[level]
233 def isEnabled(self, level):
234 return level >= self.level
235 def isDebugEnabled(self): return self.isEnabled(self.DEBUG)
236 def isInfoEnabled(self): return self.isEnabled(self.INFO)
237 def isWarnEnabled(self): return self.isEnabled(self.WARN)
238 def isErrorEnabled(self): return self.isEnabled(self.ERROR)
239 def isFatalEnabled(self): return self.isEnabled(self.FATAL)
240 def log(self, level, msg, *args):
241 if level < self.level:
242 return
243 message = "%s: %s: " % (self._name, self._getLevelName(level).lower())
244 message = message + (msg % args) + "\n"
245 self.stream.write(message)
246 self.stream.flush()
247 def debug(self, msg, *args):
248 self.log(self.DEBUG, msg, *args)
249 def info(self, msg, *args):
250 self.log(self.INFO, msg, *args)
251 def warn(self, msg, *args):
252 self.log(self.WARN, msg, *args)
253 def error(self, msg, *args):
254 self.log(self.ERROR, msg, *args)
255 def fatal(self, msg, *args):
256 self.log(self.CRITICAL, msg, *args)
258 log = _Logger("gmxpreprocess", _Logger.WARN)
262 #---- internal support stuff
264 def SubstituteInternal(expr, defines):
265 prevexpr = ''
266 while (expr!=prevexpr):
267 prevexpr=expr
268 for name in reversed(sorted(defines, key=len)):
269 value = defines[name]
270 expr = expr.replace(name, str(value))
271 return expr
273 def SubstituteInCode(expr, defines):
274 prevexpr = ''
275 while (expr!=prevexpr):
276 prevexpr=expr
277 for name in reversed(sorted(defines, key=len)):
278 value = defines[name]
279 expr = expr.replace('{' + name + '}', str(value))
280 return expr
283 def _evaluate(expr, defines):
284 """Evaluate the given expression string with the given context.
286 WARNING: This runs eval() on a user string. This is unsafe.
288 #interpolated = _interpolate(s, defines)
290 try:
291 rv = eval(expr, {'defined':lambda v: v in defines}, defines)
292 except Exception, ex:
293 msg = str(ex)
294 if msg.startswith("name '") and msg.endswith("' is not defined"):
295 # A common error (at least this is presumed:) is to have
296 # defined(FOO) instead of defined('FOO')
297 # We should give a little as to what might be wrong.
298 # msg == "name 'FOO' is not defined" --> varName == "FOO"
299 varName = msg[len("name '"):-len("' is not defined")]
300 if expr.find("defined(%s)" % varName) != -1:
301 # "defined(FOO)" in expr instead of "defined('FOO')"
302 msg += " (perhaps you want \"defined('%s')\" instead of "\
303 "\"defined(%s)\")" % (varName, varName)
304 elif msg.startswith("invalid syntax"):
305 msg = "invalid syntax: '%s'" % expr
306 raise PreprocessError(msg, defines['__FILE__'], defines['__LINE__'])
307 log.debug("evaluate %r -> %s (defines=%r)", expr, rv, defines)
309 return rv
311 #---- module API
313 def gmxpreprocess(infile, outfile=sys.stdout, defines={},
314 force=0, keepLines=0, includePath=[], substitute=1,
315 contentType=None, contentTypesRegistry=None,
316 __preprocessedFiles=None):
317 """Preprocess the given file.
319 "infile" is the input path.
320 "outfile" is the output path or stream (default is sys.stdout).
321 "defines" is a dictionary of defined variables that will be
322 understood in preprocessor statements. Keys must be strings and,
323 currently, only the truth value of any key's value matters.
324 "force" will overwrite the given outfile if it already exists. Otherwise
325 an IOError will be raise if the outfile already exists.
326 "keepLines" will cause blank lines to be emitted for preprocessor lines
327 and content lines that would otherwise be skipped.
328 "includePath" is a list of directories to search for given #include
329 directives. The directory of the file being processed is presumed.
330 "substitute", if true, will allow substitution of defines into emitted
331 lines. (NOTE: This substitution will happen within program strings
332 as well. This may not be what you expect.)
333 "contentType" can be used to specify the content type of the input
334 file. It not given, it will be guessed.
335 "contentTypesRegistry" is an instance of ContentTypesRegistry. If not specified
336 a default registry will be created.
337 "__preprocessedFiles" (for internal use only) is used to ensure files
338 are not recusively preprocessed.
340 Returns the modified dictionary of defines or raises PreprocessError if
341 there was some problem.
343 if __preprocessedFiles is None:
344 __preprocessedFiles = []
345 log.info("preprocess(infile=%r, outfile=%r, defines=%r, force=%r, "\
346 "keepLines=%r, includePath=%r, contentType=%r, "\
347 "__preprocessedFiles=%r)", infile, outfile, defines, force,
348 keepLines, includePath, contentType, __preprocessedFiles)
349 absInfile = os.path.normpath(os.path.abspath(infile))
350 if absInfile in __preprocessedFiles:
351 raise PreprocessError("detected recursive #include of '%s'"\
352 % infile)
353 __preprocessedFiles.append(os.path.abspath(infile))
355 # Determine the content type and comment info for the input file.
356 if contentType is None:
357 registry = contentTypesRegistry or getDefaultContentTypesRegistry()
358 contentType = registry.getContentType(infile)
359 if contentType is None:
360 contentType = "Text"
361 log.warn("defaulting content type for '%s' to '%s'",
362 infile, contentType)
363 try:
364 cgs = _commentGroups[contentType]
365 except KeyError:
366 raise PreprocessError("don't know comment delimiters for content "\
367 "type '%s' (file '%s')"\
368 % (contentType, infile))
370 # Generate statement parsing regexes. Basic format:
371 # <comment-prefix> <preprocessor-stmt> <comment-suffix>
372 # Examples:
373 # <!-- #if foo -->
374 # ...
375 # <!-- #endif -->
377 # # #if BAR
378 # ...
379 # # #else
380 # ...
381 # # #endif
382 stmts = ['##\s*(?P<op>.*?)',
383 '#\s*(?P<op>if|elif|ifdef|ifndef)\s+(?P<expr>.*?)',
384 '#\s*(?P<op>else|endif)',
385 '#\s*(?P<op>error)\s+(?P<error>.*?)',
386 '#\s*(?P<op>define)\s+(?P<var>[^\s]*?)(\s+(?P<val>.+?))?',
387 '#\s*(?P<op>undef)\s+(?P<var>[^\s]*?)',
388 '#\s*(?P<op>for)\s+(?P<var>.*?)\s+((in)|(IN))\s+(?P<valuelist>.*?)',
389 '#\s*(?P<op>endfor)',
390 '#\s*(?P<op>include)\s+"(?P<fname>.*?)"',
391 r'#\s*(?P<op>include)\s+(?P<var>[^\s]+?)',
393 patterns = []
394 for stmt in stmts:
395 # The comment group prefix and suffix can either be just a
396 # string or a compiled regex.
397 for cprefix, csuffix in cgs:
398 if hasattr(cprefix, "pattern"):
399 pattern = cprefix.pattern
400 else:
401 pattern = r"^\s*%s\s*" % re.escape(cprefix)
402 pattern += stmt
403 if hasattr(csuffix, "pattern"):
404 pattern += csuffix.pattern
405 else:
406 pattern += r"\s*%s\s*$" % re.escape(csuffix)
407 patterns.append(pattern)
408 stmtRes = [re.compile(p) for p in patterns]
410 # Process the input file.
411 # (Would be helpful if I knew anything about lexing and parsing
412 # simple grammars.)
413 fin = open(infile, 'r')
414 lines = fin.readlines()
415 # Merge multi-line comments
416 for i in range(len(lines)-1,-1,-1):
417 line = lines[i].rstrip(' \r\n')
418 if len(line)>0 and line[-1]=='\\':
419 lines[i] = line[:-1] + ' ' + lines[i+1]
420 lines[i+1] = '' # keep an empty line to avoid screwing up line numbers
422 fin.close()
423 if type(outfile) in types.StringTypes:
424 if force and os.path.exists(outfile):
425 os.chmod(outfile, 0777)
426 os.remove(outfile)
427 fout = open(outfile, 'w')
428 else:
429 fout = outfile
431 defines['__FILE__'] = infile
432 SKIP, EMIT = range(2) # states
433 states = [(EMIT, # a state is (<emit-or-skip-lines-in-this-section>,
434 0, # <have-emitted-in-this-if-block>,
435 0)] # <have-seen-'else'-in-this-if-block>)
436 lineNum = 0
437 nlines = len(lines)
438 forlevel = 0
439 forvar = {}
440 forvaluelist = {}
441 forstartline = {}
442 foriteration = {}
443 last_emitted_was_blank = True
445 while lineNum<nlines:
447 line = lines[lineNum]
449 log.debug("line %d: %r", lineNum+1, line)
450 defines['__LINE__'] = lineNum+1
452 # Is this line a preprocessor stmt line?
453 #XXX Could probably speed this up by optimizing common case of
454 # line NOT being a preprocessor stmt line.
455 for stmtRe in stmtRes:
456 match = stmtRe.match(line)
457 if match:
458 break
459 else:
460 match = None
462 if match:
464 # Remove contents after ## (comment)
465 idx=line.find("##")
466 if(idx>0):
467 line = line[0:idx]
469 op = match.group("op")
470 log.debug("%r stmt (states: %r)", op, states)
471 if op == "define":
472 if not (states and states[-1][0] == SKIP):
473 var, val = match.group("var", "val")
474 val = SubstituteInternal(str(val), defines)
476 if val is None:
477 val = None
478 else:
479 try:
480 val = eval(val, {}, {})
481 except:
482 pass
483 defines[var] = val
484 elif op == "undef":
485 if not (states and states[-1][0] == SKIP):
486 var = match.group("var")
487 try:
488 del defines[var]
489 except KeyError:
490 pass
491 elif op == "include":
492 if not (states and states[-1][0] == SKIP):
493 if "var" in match.groupdict():
494 # This is the second include form: #include VAR
495 var = match.group("var")
496 f = defines[var]
497 else:
498 # This is the first include form: #include "path"
499 f = match.group("fname")
501 for d in [os.path.dirname(infile)] + includePath:
502 fname = os.path.normpath(os.path.join(d, f))
503 if os.path.exists(fname):
504 break
505 else:
506 raise PreprocessError("could not find #include'd file "\
507 "\"%s\" on include path: %r"\
508 % (f, includePath))
509 defines = gmxpreprocess(fname, fout, defines, force,
510 keepLines, includePath, substitute,
511 contentTypesRegistry=contentTypesRegistry,
512 __preprocessedFiles=__preprocessedFiles)
513 elif op in ("if", "ifdef", "ifndef"):
514 if op == "if":
515 expr = match.group("expr")
516 elif op == "ifdef":
517 expr = "defined('%s')" % match.group("expr")
518 elif op == "ifndef":
519 expr = "not defined('%s')" % match.group("expr")
520 try:
521 if states and states[-1][0] == SKIP:
522 # Were are nested in a SKIP-portion of an if-block.
523 states.append((SKIP, 0, 0))
524 elif _evaluate(expr, defines):
525 states.append((EMIT, 1, 0))
526 else:
527 states.append((SKIP, 0, 0))
528 except KeyError:
529 raise PreprocessError("use of undefined variable in "\
530 "#%s stmt" % op, defines['__FILE__'],
531 defines['__LINE__'], line)
532 elif op == "elif":
533 expr = match.group("expr")
534 try:
535 if states[-1][2]: # already had #else in this if-block
536 raise PreprocessError("illegal #elif after #else in "\
537 "same #if block", defines['__FILE__'],
538 defines['__LINE__'], line)
539 elif states[-1][1]: # if have emitted in this if-block
540 states[-1] = (SKIP, 1, 0)
541 elif states[:-1] and states[-2][0] == SKIP:
542 # Were are nested in a SKIP-portion of an if-block.
543 states[-1] = (SKIP, 0, 0)
544 elif _evaluate(expr, defines):
545 states[-1] = (EMIT, 1, 0)
546 else:
547 states[-1] = (SKIP, 0, 0)
548 except IndexError:
549 raise PreprocessError("#elif stmt without leading #if "\
550 "stmt", defines['__FILE__'],
551 defines['__LINE__'], line)
552 elif op == "else":
553 try:
554 if states[-1][2]: # already had #else in this if-block
555 raise PreprocessError("illegal #else after #else in "\
556 "same #if block", defines['__FILE__'],
557 defines['__LINE__'], line)
558 elif states[-1][1]: # if have emitted in this if-block
559 states[-1] = (SKIP, 1, 1)
560 elif states[:-1] and states[-2][0] == SKIP:
561 # Were are nested in a SKIP-portion of an if-block.
562 states[-1] = (SKIP, 0, 1)
563 else:
564 states[-1] = (EMIT, 1, 1)
565 except IndexError:
566 raise PreprocessError("#else stmt without leading #if "\
567 "stmt", defines['__FILE__'],
568 defines['__LINE__'], line)
569 elif op == "endif":
570 try:
571 states.pop()
572 except IndexError:
573 raise PreprocessError("#endif stmt without leading #if"\
574 "stmt", defines['__FILE__'],
575 defines['__LINE__'], line)
576 elif op == "for":
578 tmpstr = match.group("var")
579 thisforvar = tmpstr.split(",")
580 for s in thisforvar:
581 s.strip(" ")
583 # Thisforvar is now a _list_ if 1 or more for variables, without whitespace
585 # Evaluate the list-of-values just in case it refers to a list variable
586 valuelist = _evaluate(match.group("valuelist"),defines)
587 # If a string, evaluate it again
588 if(isinstance(valuelist,str)):
589 valuelist = eval(valuelist)
591 forlevel += 1
593 forvar[forlevel] = thisforvar
594 forvaluelist[forlevel] = valuelist
595 forstartline[forlevel] = lineNum + 1
596 foriteration[forlevel] = 0
598 if(len(valuelist)>0):
599 # set the variable for this for-loop to the first value in the list for this level
600 nvar=len(thisforvar)
601 for i in range(nvar):
602 if(nvar==1):
603 val=valuelist[0]
604 else:
605 val=valuelist[0][i]
606 defines[thisforvar[i]] = val
608 else:
609 # list was empty, so skip this entire section
610 states.append((SKIP, 0, 0))
612 elif op == "endfor":
613 foriteration[forlevel] += 1
614 # Should we do one more iteration on this level?
615 iter = foriteration[forlevel]
616 thisforvar = forvar[forlevel]
617 valuelist = forvaluelist[forlevel]
619 if(iter<len(valuelist)):
621 nvar = len(thisforvar)
622 for i in range(len(thisforvar)):
623 if(nvar==1):
624 val=valuelist[iter]
625 else:
626 val=valuelist[iter][i]
627 defines[thisforvar[i]] = val
629 lineNum = forstartline[forlevel]
630 continue
631 else:
632 forlevel -= 1
633 if(len(valuelist)==0):
634 states.pop()
636 elif op == "error":
637 if not (states and states[-1][0] == SKIP):
638 error = match.group("error")
639 raise PreprocessError("#error: "+error, defines['__FILE__'],
640 defines['__LINE__'], line)
641 log.debug("states: %r", states)
642 if keepLines:
643 fout.write("\n")
644 else:
645 try:
646 if states[-1][0] == EMIT:
647 log.debug("emit line (%s)" % states[-1][1])
648 # Substitute all defines into line.
649 # XXX Should avoid recursive substitutions. But that
650 # would be a pain right now.
652 sline = line
653 if substitute:
654 sline = SubstituteInCode(sline,defines)
656 emitted_line_is_blank = (sline.strip()=='')
657 if( not (emitted_line_is_blank and last_emitted_was_blank) and not keepLines):
658 fout.write(sline)
659 last_emitted_was_blank = emitted_line_is_blank
661 elif keepLines:
662 log.debug("keep blank line (%s)" % states[-1][1])
663 fout.write("\n")
664 else:
665 log.debug("skip line (%s)" % states[-1][1])
666 except IndexError:
667 raise PreprocessError("superfluous #endif before this line",
668 defines['__FILE__'],
669 defines['__LINE__'])
670 lineNum += 1
672 if len(states) > 1:
673 raise PreprocessError("unterminated #if block", defines['__FILE__'],
674 defines['__LINE__'])
675 elif len(states) < 1:
676 raise PreprocessError("superfluous #endif on or before this line",
677 defines['__FILE__'], defines['__LINE__'])
679 if fout != outfile:
680 fout.close()
682 return defines
685 #---- content-type handling
687 _gDefaultContentTypes = """
688 # Default file types understood by "gmxpreprocess.py".
690 # Format is an extension of 'mime.types' file syntax.
691 # - '#' indicates a comment to the end of the line.
692 # - a line is:
693 # <filetype> [<pattern>...]
694 # where,
695 # <filetype>'s are equivalent in spirit to the names used in the Windows
696 # registry in HKCR, but some of those names suck or are inconsistent;
697 # and
698 # <pattern> is a suffix (pattern starts with a '.'), a regular expression
699 # (pattern is enclosed in '/' characters), a full filename (anything
700 # else).
702 # Notes on case-sensitivity:
704 # A suffix pattern is case-insensitive on Windows and case-sensitive
705 # elsewhere. A filename pattern is case-sensitive everywhere. A regex
706 # pattern's case-sensitivity is defined by the regex. This means it is by
707 # default case-sensitive, but this can be changed using Python's inline
708 # regex option syntax. E.g.:
709 # Makefile /^(?i)makefile.*$/ # case-INsensitive regex
711 Python .py
712 Python .pyw
713 Perl .pl
714 Ruby .rb
715 Tcl .tcl
716 XML .xml
717 XML .kpf
718 XML .xul
719 XML .rdf
720 XML .xslt
721 XML .xsl
722 XML .wxs
723 XML .wxi
724 HTML .htm
725 HTML .html
726 XML .xhtml
727 Makefile /^[Mm]akefile.*$/
728 PHP .php
729 JavaScript .js
730 CSS .css
731 C++ .c # C++ because then we can use //-style comments
732 C++ .cpp
733 C++ .cxx
734 C++ .cc
735 C++ .h
736 C++ .hpp
737 C++ .hxx
738 C++ .hh
739 C++ .gpp # Gromacs pre-preprocessing
740 IDL .idl
741 Text .txt
742 Fortran .f
743 Fortran .f90
744 Shell .sh
745 Shell .csh
746 Shell .ksh
747 Shell .zsh
748 Java .java
749 C# .cs
750 TeX .tex
752 # Some Komodo-specific file extensions
753 Python .ksf # Fonts & Colors scheme files
754 Text .kkf # Keybinding schemes files
757 class ContentTypesRegistry:
758 """A class that handles determining the filetype of a given path.
760 Usage:
761 >>> registry = ContentTypesRegistry()
762 >>> registry.getContentType("foo.py")
763 "Python"
766 def __init__(self, contentTypesPaths=None):
767 """The constructor.
769 @param contentTypesPaths {str} Optional path to content.types file.
771 ## Path to content.types file to decide language
772 self.contentTypesPaths = contentTypesPaths
773 self._load()
775 def _load(self):
776 from os.path import dirname, join, exists
778 ## initialize map of file suffixes to language
779 self.suffixMap = {}
780 ## initialize map of filename regex to language
781 self.regexMap = {}
782 ## initialize map of filenames to language
783 self.filenameMap = {}
785 self._loadContentType(_gDefaultContentTypes)
786 localContentTypesPath = join(dirname(__file__), "content.types")
787 if exists(localContentTypesPath):
788 log.debug("load content types file: `%r'" % localContentTypesPath)
789 self._loadContentType(open(localContentTypesPath, 'r').read())
790 for path in (self.contentTypesPaths or []):
791 log.debug("load content types file: `%r'" % path)
792 self._loadContentType(open(path, 'r').read())
794 def _loadContentType(self, content, path=None):
795 """Return the registry for the given content.types file.
797 The registry is three mappings:
798 <suffix> -> <content type>
799 <regex> -> <content type>
800 <filename> -> <content type>
802 for line in content.splitlines(0):
803 words = line.strip().split()
804 for i in range(len(words)):
805 if words[i][0] == '#':
806 del words[i:]
807 break
808 if not words: continue
809 contentType, patterns = words[0], words[1:]
810 if not patterns:
811 if line[-1] == '\n': line = line[:-1]
812 raise PreprocessError("bogus content.types line, there must "\
813 "be one or more patterns: '%s'" % line)
814 for pattern in patterns:
815 if pattern.startswith('.'):
816 if sys.platform.startswith("win"):
817 # Suffix patterns are case-insensitive on Windows.
818 pattern = pattern.lower()
819 self.suffixMap[pattern] = contentType
820 elif pattern.startswith('/') and pattern.endswith('/'):
821 self.regexMap[re.compile(pattern[1:-1])] = contentType
822 else:
823 self.filenameMap[pattern] = contentType
825 def getContentType(self, path):
826 """Return a content type for the given path.
828 @param path {str} The path of file for which to guess the
829 content type.
830 @returns {str|None} Returns None if could not determine the
831 content type.
833 basename = os.path.basename(path)
834 contentType = None
835 # Try to determine from the path.
836 if not contentType and self.filenameMap.has_key(basename):
837 contentType = self.filenameMap[basename]
838 log.debug("Content type of '%s' is '%s' (determined from full "\
839 "path).", path, contentType)
840 # Try to determine from the suffix.
841 if not contentType and '.' in basename:
842 suffix = "." + basename.split(".")[-1]
843 if sys.platform.startswith("win"):
844 # Suffix patterns are case-insensitive on Windows.
845 suffix = suffix.lower()
846 if self.suffixMap.has_key(suffix):
847 contentType = self.suffixMap[suffix]
848 log.debug("Content type of '%s' is '%s' (determined from "\
849 "suffix '%s').", path, contentType, suffix)
850 # Try to determine from the registered set of regex patterns.
851 if not contentType:
852 for regex, ctype in self.regexMap.items():
853 if regex.search(basename):
854 contentType = ctype
855 log.debug("Content type of '%s' is '%s' (matches regex '%s')",
856 path, contentType, regex.pattern)
857 break
858 # Try to determine from the file contents.
859 content = open(path, 'rb').read()
860 if content.startswith("<?xml"): # cheap XML sniffing
861 contentType = "XML"
862 return contentType
864 _gDefaultContentTypesRegistry = None
865 def getDefaultContentTypesRegistry():
866 global _gDefaultContentTypesRegistry
867 if _gDefaultContentTypesRegistry is None:
868 _gDefaultContentTypesRegistry = ContentTypesRegistry()
869 return _gDefaultContentTypesRegistry
872 #---- internal support stuff
873 #TODO: move other internal stuff down to this section
875 try:
876 reversed
877 except NameError:
878 # 'reversed' added in Python 2.4 (http://www.python.org/doc/2.4/whatsnew/node7.html)
879 def reversed(seq):
880 rseq = list(seq)
881 rseq.reverse()
882 for item in rseq:
883 yield item
884 try:
885 sorted
886 except NameError:
887 # 'sorted' added in Python 2.4. Note that I'm only implementing enough
888 # of sorted as is used in this module.
889 def sorted(seq, key=None):
890 identity = lambda x: x
891 key_func = (key or identity)
892 sseq = list(seq)
893 sseq.sort(lambda self, other: cmp(key_func(self), key_func(other)))
894 for item in sseq:
895 yield item
898 #---- mainline
900 def main(argv):
901 try:
902 optlist, args = getopt.getopt(argv[1:], 'hVvo:D:fkI:sc:',
903 ['help', 'version', 'verbose', 'force', 'keep-lines',
904 'no-substitute', 'content-types-path='])
905 except getopt.GetoptError, msg:
906 sys.stderr.write("gmxpreprocess: error: %s. Your invocation was: %s\n"\
907 % (msg, argv))
908 sys.stderr.write("See 'gmxpreprocess --help'.\n")
909 return 1
910 outfile = sys.stdout
911 defines = {}
912 force = 0
913 keepLines = 0
914 substitute = 1
915 includePath = []
916 contentTypesPaths = []
917 for opt, optarg in optlist:
918 if opt in ('-h', '--help'):
919 sys.stdout.write(__doc__)
920 return 0
921 elif opt in ('-V', '--version'):
922 sys.stdout.write("gmxpreprocess %s\n" % __version__)
923 return 0
924 elif opt in ('-v', '--verbose'):
925 log.setLevel(log.DEBUG)
926 elif opt == '-o':
927 outfile = optarg
928 if opt in ('-f', '--force'):
929 force = 1
930 elif opt == '-D':
931 if optarg.find('=') != -1:
932 var, val = optarg.split('=', 1)
933 try:
934 val = int(val)
935 except ValueError:
936 pass
937 else:
938 var, val = optarg, None
939 defines[var] = val
940 elif opt in ('-k', '--keep-lines'):
941 keepLines = 1
942 elif opt == '-I':
943 includePath.append(optarg)
944 elif opt in ('-s', '--no-substitute'):
945 substitute = 0
946 elif opt in ('-c', '--content-types-path'):
947 contentTypesPaths.append(optarg)
949 if len(args) != 1:
950 sys.stderr.write("gmxpreprocess: error: incorrect number of "\
951 "arguments: argv=%r\n" % argv)
952 return 1
953 else:
954 infile = args[0]
956 try:
957 contentTypesRegistry = ContentTypesRegistry(contentTypesPaths)
958 gmxpreprocess(infile, outfile, defines, force, keepLines, includePath,
959 substitute, contentTypesRegistry=contentTypesRegistry)
960 except PreprocessError, ex:
961 if log.isDebugEnabled():
962 import traceback
963 traceback.print_exc(file=sys.stderr)
964 else:
965 sys.stderr.write("gmxpreprocess: error: %s\n" % str(ex))
966 return 1
968 if __name__ == "__main__":
969 __file__ = sys.argv[0]
970 sys.exit( main(sys.argv) )