Cheetah/FileUtils.py

   1 #!/usr/bin/env python
   2 # $Id: FileUtils.py,v 1.12 2005/11/02 22:26:07 tavis_rudd Exp $
   3 """File utitilies for Python:
   4
   5 Meta-Data
   6 ================================================================================
   7 Author: Tavis Rudd <tavis@damnsimple.com>
   8 License: This software is released for unlimited distribution under the
   9          terms of the MIT license.  See the LICENSE file.
  10 Version: $Revision: 1.12 $
  11 Start Date: 2001/09/26
  12 Last Revision Date: $Date: 2005/11/02 22:26:07 $
  13 """
  14 __author__ = "Tavis Rudd <tavis@damnsimple.com>"
  15 __revision__ = "$Revision: 1.12 $"[11:-2]
  16
  17
  18 from glob import glob
  19 import os
  20 from os import listdir
  21 import os.path
  22 import re
  23 from types import StringType
  24 from tempfile import mktemp
  25
  26 def _escapeRegexChars(txt,
  27                      escapeRE=re.compile(r'([\$\^\*\+\.\?\{\}\[\]\(\)\|\\])')):
  28     return escapeRE.sub(r'\\\1' , txt)
  29
  30 def findFiles(*args, **kw):
  31     """Recursively find all the files matching a glob pattern.
  32
  33     This function is a wrapper around the FileFinder class.  See its docstring
  34     for details about the accepted arguments, etc."""
  35
  36     return FileFinder(*args, **kw).files()
  37
  38 def replaceStrInFiles(files, theStr, repl):
  39
  40     """Replace all instances of 'theStr' with 'repl' for each file in the 'files'
  41     list. Returns a dictionary with data about the matches found.
  42
  43     This is like string.replace() on a multi-file basis.
  44
  45     This function is a wrapper around the FindAndReplace class. See its
  46     docstring for more details."""
  47
  48     pattern = _escapeRegexChars(theStr)
  49     return FindAndReplace(files, pattern, repl).results()
  50
  51 def replaceRegexInFiles(files, pattern, repl):
  52
  53     """Replace all instances of regex 'pattern' with 'repl' for each file in the
  54     'files' list. Returns a dictionary with data about the matches found.
  55
  56     This is like re.sub on a multi-file basis.
  57
  58     This function is a wrapper around the FindAndReplace class. See its
  59     docstring for more details."""
  60
  61     return FindAndReplace(files, pattern, repl).results()
  62
  63
  64 ##################################################
  65 ## CLASSES
  66
  67 class FileFinder:
  68
  69     """Traverses a directory tree and finds all files in it that match one of
  70     the specified glob patterns."""
  71
  72     def __init__(self, rootPath,
  73                  globPatterns=('*',),
  74                  ignoreBasenames=('CVS','.svn'),
  75                  ignoreDirs=(),
  76                  ):
  77
  78         self._rootPath = rootPath
  79         self._globPatterns = globPatterns
  80         self._ignoreBasenames = ignoreBasenames
  81         self._ignoreDirs = ignoreDirs
  82         self._files = []
  83
  84         self.walkDirTree(rootPath)
  85
  86     def walkDirTree(self, dir='.',
  87
  88                     listdir=os.listdir,
  89                     isdir=os.path.isdir,
  90                     join=os.path.join,
  91                     ):
  92
  93         """Recursively walk through a directory tree and find matching files."""
  94         processDir = self.processDir
  95         filterDir = self.filterDir
  96
  97         pendingDirs = [dir]
  98         addDir = pendingDirs.append
  99         getDir = pendingDirs.pop
 100
 101         while pendingDirs:
 102             dir = getDir()
 103             ##  process this dir
 104             processDir(dir)
 105
 106             ## and add sub-dirs
 107             for baseName in listdir(dir):
 108                 fullPath = join(dir, baseName)
 109                 if isdir(fullPath):
 110                     if filterDir(baseName, fullPath):
 111                         addDir( fullPath )
 112
 113     def filterDir(self, baseName, fullPath):
 114
 115         """A hook for filtering out certain dirs. """
 116
 117         return not (baseName in self._ignoreBasenames or
 118                     fullPath in self._ignoreDirs)
 119
 120     def processDir(self, dir, glob=glob):
 121         extend = self._files.extend
 122         for pattern in self._globPatterns:
 123             extend( glob(os.path.join(dir, pattern)) )
 124
 125     def files(self):
 126         return self._files
 127
 128 class _GenSubberFunc:
 129
 130     """Converts a 'sub' string in the form that one feeds to re.sub (backrefs,
 131     groups, etc.) into a function that can be used to do the substitutions in
 132     the FindAndReplace class."""
 133
 134     backrefRE = re.compile(r'\\([1-9][0-9]*)')
 135     groupRE = re.compile(r'\\g<([a-zA-Z_][a-zA-Z_]*)>')
 136
 137     def __init__(self, replaceStr):
 138         self._src = replaceStr
 139         self._pos = 0
 140         self._codeChunks = []
 141         self.parse()
 142
 143     def src(self):
 144         return self._src
 145
 146     def pos(self):
 147         return self._pos
 148
 149     def setPos(self, pos):
 150         self._pos = pos
 151
 152     def atEnd(self):
 153         return self._pos >= len(self._src)
 154
 155     def advance(self, offset=1):
 156         self._pos += offset
 157
 158     def readTo(self, to, start=None):
 159         if start == None:
 160             start = self._pos
 161         self._pos = to
 162         if self.atEnd():
 163             return self._src[start:]
 164         else:
 165             return self._src[start:to]
 166
 167     ## match and get methods
 168
 169     def matchBackref(self):
 170         return self.backrefRE.match(self.src(), self.pos())
 171
 172     def getBackref(self):
 173         m = self.matchBackref()
 174         self.setPos(m.end())
 175         return m.group(1)
 176
 177     def matchGroup(self):
 178         return self.groupRE.match(self.src(), self.pos())
 179
 180     def getGroup(self):
 181         m = self.matchGroup()
 182         self.setPos(m.end())
 183         return m.group(1)
 184
 185     ## main parse loop and the eat methods
 186
 187     def parse(self):
 188         while not self.atEnd():
 189             if self.matchBackref():
 190                 self.eatBackref()
 191             elif self.matchGroup():
 192                 self.eatGroup()
 193             else:
 194                 self.eatStrConst()
 195
 196     def eatStrConst(self):
 197         startPos = self.pos()
 198         while not self.atEnd():
 199             if self.matchBackref() or self.matchGroup():
 200                 break
 201             else:
 202                 self.advance()
 203         strConst = self.readTo(self.pos(), start=startPos)
 204         self.addChunk(repr(strConst))
 205
 206     def eatBackref(self):
 207         self.addChunk( 'm.group(' + self.getBackref() + ')' )
 208
 209     def eatGroup(self):
 210         self.addChunk( 'm.group("' + self.getGroup() + '")' )
 211
 212     def addChunk(self, chunk):
 213         self._codeChunks.append(chunk)
 214
 215     ## code wrapping methods
 216
 217     def codeBody(self):
 218         return ', '.join(self._codeChunks)
 219
 220     def code(self):
 221         return "def subber(m):\n\treturn ''.join([%s])\n" % (self.codeBody())
 222
 223     def subberFunc(self):
 224         exec self.code()
 225         return subber
 226
 227
 228 class FindAndReplace:
 229
 230     """Find and replace all instances of 'patternOrRE' with 'replacement' for
 231     each file in the 'files' list. This is a multi-file version of re.sub().
 232
 233     'patternOrRE' can be a raw regex pattern or
 234     a regex object as generated by the re module. 'replacement' can be any
 235     string that would work with patternOrRE.sub(replacement, fileContents).
 236     """
 237
 238     def __init__(self, files, patternOrRE, replacement,
 239                  recordResults=True):
 240
 241
 242         if type(patternOrRE) == StringType:
 243             self._regex = re.compile(patternOrRE)
 244         else:
 245             self._regex = patternOrRE
 246         if type(replacement) == StringType:
 247             self._subber = _GenSubberFunc(replacement).subberFunc()
 248         else:
 249             self._subber = replacement
 250
 251         self._pattern = pattern = self._regex.pattern
 252         self._files = files
 253         self._results = {}
 254         self._recordResults = recordResults
 255
 256         ## see if we should use pgrep to do the file matching
 257         self._usePgrep = False
 258         if (os.popen3('pgrep')[2].read()).startswith('Usage:'):
 259             ## now check to make sure pgrep understands the pattern
 260             tmpFile = mktemp()
 261             open(tmpFile, 'w').write('#')
 262             if not (os.popen3('pgrep "' + pattern + '" ' + tmpFile)[2].read()):
 263                 # it didn't print an error msg so we're ok
 264                 self._usePgrep = True
 265             os.remove(tmpFile)
 266
 267         self._run()
 268
 269     def results(self):
 270         return self._results
 271
 272     def _run(self):
 273         regex = self._regex
 274         subber = self._subDispatcher
 275         usePgrep = self._usePgrep
 276         pattern = self._pattern
 277         for file in self._files:
 278             if not os.path.isfile(file):
 279                 continue # skip dirs etc.
 280
 281             self._currFile = file
 282             found = False
 283             if locals().has_key('orig'):
 284                 del orig
 285             if self._usePgrep:
 286                 if os.popen('pgrep "' + pattern + '" ' + file ).read():
 287                     found = True
 288             else:
 289                 orig = open(file).read()
 290                 if regex.search(orig):
 291                     found = True
 292             if found:
 293                 if not locals().has_key('orig'):
 294                     orig = open(file).read()
 295                 new = regex.sub(subber, orig)
 296                 open(file, 'w').write(new)
 297
 298     def _subDispatcher(self, match):
 299         if self._recordResults:
 300             if not self._results.has_key(self._currFile):
 301                 res = self._results[self._currFile] = {}
 302                 res['count'] = 0
 303                 res['matches'] = []
 304             else:
 305                 res = self._results[self._currFile]
 306             res['count'] += 1
 307             res['matches'].append({'contents':match.group(),
 308                                    'start':match.start(),
 309                                    'end':match.end(),
 310                                    }
 311                                    )
 312         return self._subber(match)
 313
 314
 315 class SourceFileStats:
 316
 317     """
 318     """
 319
 320     _fileStats = None
 321
 322     def __init__(self, files):
 323         self._fileStats = stats = {}
 324         for file in files:
 325             stats[file] = self.getFileStats(file)
 326
 327     def rawStats(self):
 328         return self._fileStats
 329
 330     def summary(self):
 331         codeLines = 0
 332         blankLines = 0
 333         commentLines = 0
 334         totalLines = 0
 335         for fileStats in self.rawStats().values():
 336             codeLines += fileStats['codeLines']
 337             blankLines += fileStats['blankLines']
 338             commentLines += fileStats['commentLines']
 339             totalLines += fileStats['totalLines']
 340
 341         stats = {'codeLines':codeLines,
 342                  'blankLines':blankLines,
 343                  'commentLines':commentLines,
 344                  'totalLines':totalLines,
 345                  }
 346         return stats
 347
 348     def printStats(self):
 349         pass
 350
 351     def getFileStats(self, fileName):
 352         codeLines = 0
 353         blankLines = 0
 354         commentLines = 0
 355         commentLineRe = re.compile(r'\s#.*$')
 356         blankLineRe = re.compile('\s$')
 357         lines = open(fileName).read().splitlines()
 358         totalLines = len(lines)
 359
 360         for line in lines:
 361             if commentLineRe.match(line):
 362                 commentLines += 1
 363             elif blankLineRe.match(line):
 364                 blankLines += 1
 365             else:
 366                 codeLines += 1
 367
 368         stats = {'codeLines':codeLines,
 369                  'blankLines':blankLines,
 370                  'commentLines':commentLines,
 371                  'totalLines':totalLines,
 372                  }
 373
 374         return stats