Merge branch 'master' into subfolders-8.3
[pyTivo/krkeegan.git] / Cheetah / FileUtils.py
blobc3749f500a119a12440dc05b4760630f218685f1
1 #!/usr/bin/env python
2 # $Id: FileUtils.py,v 1.12 2005/11/02 22:26:07 tavis_rudd Exp $
3 """File utitilies for Python:
5 Meta-Data
6 ================================================================================
7 Author: Tavis Rudd <tavis@damnsimple.com>
8 License: This software is released for unlimited distribution under the
9 terms of the MIT license. See the LICENSE file.
10 Version: $Revision: 1.12 $
11 Start Date: 2001/09/26
12 Last Revision Date: $Date: 2005/11/02 22:26:07 $
13 """
14 __author__ = "Tavis Rudd <tavis@damnsimple.com>"
15 __revision__ = "$Revision: 1.12 $"[11:-2]
18 from glob import glob
19 import os
20 from os import listdir
21 import os.path
22 import re
23 from types import StringType
24 from tempfile import mktemp
26 def _escapeRegexChars(txt,
27 escapeRE=re.compile(r'([\$\^\*\+\.\?\{\}\[\]\(\)\|\\])')):
28 return escapeRE.sub(r'\\\1' , txt)
30 def findFiles(*args, **kw):
31 """Recursively find all the files matching a glob pattern.
33 This function is a wrapper around the FileFinder class. See its docstring
34 for details about the accepted arguments, etc."""
36 return FileFinder(*args, **kw).files()
38 def replaceStrInFiles(files, theStr, repl):
40 """Replace all instances of 'theStr' with 'repl' for each file in the 'files'
41 list. Returns a dictionary with data about the matches found.
43 This is like string.replace() on a multi-file basis.
45 This function is a wrapper around the FindAndReplace class. See its
46 docstring for more details."""
48 pattern = _escapeRegexChars(theStr)
49 return FindAndReplace(files, pattern, repl).results()
51 def replaceRegexInFiles(files, pattern, repl):
53 """Replace all instances of regex 'pattern' with 'repl' for each file in the
54 'files' list. Returns a dictionary with data about the matches found.
56 This is like re.sub on a multi-file basis.
58 This function is a wrapper around the FindAndReplace class. See its
59 docstring for more details."""
61 return FindAndReplace(files, pattern, repl).results()
64 ##################################################
65 ## CLASSES
67 class FileFinder:
69 """Traverses a directory tree and finds all files in it that match one of
70 the specified glob patterns."""
72 def __init__(self, rootPath,
73 globPatterns=('*',),
74 ignoreBasenames=('CVS','.svn'),
75 ignoreDirs=(),
78 self._rootPath = rootPath
79 self._globPatterns = globPatterns
80 self._ignoreBasenames = ignoreBasenames
81 self._ignoreDirs = ignoreDirs
82 self._files = []
84 self.walkDirTree(rootPath)
86 def walkDirTree(self, dir='.',
88 listdir=os.listdir,
89 isdir=os.path.isdir,
90 join=os.path.join,
93 """Recursively walk through a directory tree and find matching files."""
94 processDir = self.processDir
95 filterDir = self.filterDir
97 pendingDirs = [dir]
98 addDir = pendingDirs.append
99 getDir = pendingDirs.pop
101 while pendingDirs:
102 dir = getDir()
103 ## process this dir
104 processDir(dir)
106 ## and add sub-dirs
107 for baseName in listdir(dir):
108 fullPath = join(dir, baseName)
109 if isdir(fullPath):
110 if filterDir(baseName, fullPath):
111 addDir( fullPath )
113 def filterDir(self, baseName, fullPath):
115 """A hook for filtering out certain dirs. """
117 return not (baseName in self._ignoreBasenames or
118 fullPath in self._ignoreDirs)
120 def processDir(self, dir, glob=glob):
121 extend = self._files.extend
122 for pattern in self._globPatterns:
123 extend( glob(os.path.join(dir, pattern)) )
125 def files(self):
126 return self._files
128 class _GenSubberFunc:
130 """Converts a 'sub' string in the form that one feeds to re.sub (backrefs,
131 groups, etc.) into a function that can be used to do the substitutions in
132 the FindAndReplace class."""
134 backrefRE = re.compile(r'\\([1-9][0-9]*)')
135 groupRE = re.compile(r'\\g<([a-zA-Z_][a-zA-Z_]*)>')
137 def __init__(self, replaceStr):
138 self._src = replaceStr
139 self._pos = 0
140 self._codeChunks = []
141 self.parse()
143 def src(self):
144 return self._src
146 def pos(self):
147 return self._pos
149 def setPos(self, pos):
150 self._pos = pos
152 def atEnd(self):
153 return self._pos >= len(self._src)
155 def advance(self, offset=1):
156 self._pos += offset
158 def readTo(self, to, start=None):
159 if start == None:
160 start = self._pos
161 self._pos = to
162 if self.atEnd():
163 return self._src[start:]
164 else:
165 return self._src[start:to]
167 ## match and get methods
169 def matchBackref(self):
170 return self.backrefRE.match(self.src(), self.pos())
172 def getBackref(self):
173 m = self.matchBackref()
174 self.setPos(m.end())
175 return m.group(1)
177 def matchGroup(self):
178 return self.groupRE.match(self.src(), self.pos())
180 def getGroup(self):
181 m = self.matchGroup()
182 self.setPos(m.end())
183 return m.group(1)
185 ## main parse loop and the eat methods
187 def parse(self):
188 while not self.atEnd():
189 if self.matchBackref():
190 self.eatBackref()
191 elif self.matchGroup():
192 self.eatGroup()
193 else:
194 self.eatStrConst()
196 def eatStrConst(self):
197 startPos = self.pos()
198 while not self.atEnd():
199 if self.matchBackref() or self.matchGroup():
200 break
201 else:
202 self.advance()
203 strConst = self.readTo(self.pos(), start=startPos)
204 self.addChunk(repr(strConst))
206 def eatBackref(self):
207 self.addChunk( 'm.group(' + self.getBackref() + ')' )
209 def eatGroup(self):
210 self.addChunk( 'm.group("' + self.getGroup() + '")' )
212 def addChunk(self, chunk):
213 self._codeChunks.append(chunk)
215 ## code wrapping methods
217 def codeBody(self):
218 return ', '.join(self._codeChunks)
220 def code(self):
221 return "def subber(m):\n\treturn ''.join([%s])\n" % (self.codeBody())
223 def subberFunc(self):
224 exec self.code()
225 return subber
228 class FindAndReplace:
230 """Find and replace all instances of 'patternOrRE' with 'replacement' for
231 each file in the 'files' list. This is a multi-file version of re.sub().
233 'patternOrRE' can be a raw regex pattern or
234 a regex object as generated by the re module. 'replacement' can be any
235 string that would work with patternOrRE.sub(replacement, fileContents).
238 def __init__(self, files, patternOrRE, replacement,
239 recordResults=True):
242 if type(patternOrRE) == StringType:
243 self._regex = re.compile(patternOrRE)
244 else:
245 self._regex = patternOrRE
246 if type(replacement) == StringType:
247 self._subber = _GenSubberFunc(replacement).subberFunc()
248 else:
249 self._subber = replacement
251 self._pattern = pattern = self._regex.pattern
252 self._files = files
253 self._results = {}
254 self._recordResults = recordResults
256 ## see if we should use pgrep to do the file matching
257 self._usePgrep = False
258 if (os.popen3('pgrep')[2].read()).startswith('Usage:'):
259 ## now check to make sure pgrep understands the pattern
260 tmpFile = mktemp()
261 open(tmpFile, 'w').write('#')
262 if not (os.popen3('pgrep "' + pattern + '" ' + tmpFile)[2].read()):
263 # it didn't print an error msg so we're ok
264 self._usePgrep = True
265 os.remove(tmpFile)
267 self._run()
269 def results(self):
270 return self._results
272 def _run(self):
273 regex = self._regex
274 subber = self._subDispatcher
275 usePgrep = self._usePgrep
276 pattern = self._pattern
277 for file in self._files:
278 if not os.path.isfile(file):
279 continue # skip dirs etc.
281 self._currFile = file
282 found = False
283 if locals().has_key('orig'):
284 del orig
285 if self._usePgrep:
286 if os.popen('pgrep "' + pattern + '" ' + file ).read():
287 found = True
288 else:
289 orig = open(file).read()
290 if regex.search(orig):
291 found = True
292 if found:
293 if not locals().has_key('orig'):
294 orig = open(file).read()
295 new = regex.sub(subber, orig)
296 open(file, 'w').write(new)
298 def _subDispatcher(self, match):
299 if self._recordResults:
300 if not self._results.has_key(self._currFile):
301 res = self._results[self._currFile] = {}
302 res['count'] = 0
303 res['matches'] = []
304 else:
305 res = self._results[self._currFile]
306 res['count'] += 1
307 res['matches'].append({'contents':match.group(),
308 'start':match.start(),
309 'end':match.end(),
312 return self._subber(match)
315 class SourceFileStats:
320 _fileStats = None
322 def __init__(self, files):
323 self._fileStats = stats = {}
324 for file in files:
325 stats[file] = self.getFileStats(file)
327 def rawStats(self):
328 return self._fileStats
330 def summary(self):
331 codeLines = 0
332 blankLines = 0
333 commentLines = 0
334 totalLines = 0
335 for fileStats in self.rawStats().values():
336 codeLines += fileStats['codeLines']
337 blankLines += fileStats['blankLines']
338 commentLines += fileStats['commentLines']
339 totalLines += fileStats['totalLines']
341 stats = {'codeLines':codeLines,
342 'blankLines':blankLines,
343 'commentLines':commentLines,
344 'totalLines':totalLines,
346 return stats
348 def printStats(self):
349 pass
351 def getFileStats(self, fileName):
352 codeLines = 0
353 blankLines = 0
354 commentLines = 0
355 commentLineRe = re.compile(r'\s#.*$')
356 blankLineRe = re.compile('\s$')
357 lines = open(fileName).read().splitlines()
358 totalLines = len(lines)
360 for line in lines:
361 if commentLineRe.match(line):
362 commentLines += 1
363 elif blankLineRe.match(line):
364 blankLines += 1
365 else:
366 codeLines += 1
368 stats = {'codeLines':codeLines,
369 'blankLines':blankLines,
370 'commentLines':commentLines,
371 'totalLines':totalLines,
374 return stats