2 # $Id: FileUtils.py,v 1.12 2005/11/02 22:26:07 tavis_rudd Exp $
3 """File utitilies for Python:
6 ================================================================================
7 Author: Tavis Rudd <tavis@damnsimple.com>
8 License: This software is released for unlimited distribution under the
9 terms of the MIT license. See the LICENSE file.
10 Version: $Revision: 1.12 $
11 Start Date: 2001/09/26
12 Last Revision Date: $Date: 2005/11/02 22:26:07 $
14 __author__
= "Tavis Rudd <tavis@damnsimple.com>"
15 __revision__
= "$Revision: 1.12 $"[11:-2]
20 from os
import listdir
23 from types
import StringType
24 from tempfile
import mktemp
26 def _escapeRegexChars(txt
,
27 escapeRE
=re
.compile(r
'([\$\^\*\+\.\?\{\}\[\]\(\)\|\\])')):
28 return escapeRE
.sub(r
'\\\1' , txt
)
30 def findFiles(*args
, **kw
):
31 """Recursively find all the files matching a glob pattern.
33 This function is a wrapper around the FileFinder class. See its docstring
34 for details about the accepted arguments, etc."""
36 return FileFinder(*args
, **kw
).files()
38 def replaceStrInFiles(files
, theStr
, repl
):
40 """Replace all instances of 'theStr' with 'repl' for each file in the 'files'
41 list. Returns a dictionary with data about the matches found.
43 This is like string.replace() on a multi-file basis.
45 This function is a wrapper around the FindAndReplace class. See its
46 docstring for more details."""
48 pattern
= _escapeRegexChars(theStr
)
49 return FindAndReplace(files
, pattern
, repl
).results()
51 def replaceRegexInFiles(files
, pattern
, repl
):
53 """Replace all instances of regex 'pattern' with 'repl' for each file in the
54 'files' list. Returns a dictionary with data about the matches found.
56 This is like re.sub on a multi-file basis.
58 This function is a wrapper around the FindAndReplace class. See its
59 docstring for more details."""
61 return FindAndReplace(files
, pattern
, repl
).results()
64 ##################################################
69 """Traverses a directory tree and finds all files in it that match one of
70 the specified glob patterns."""
72 def __init__(self
, rootPath
,
74 ignoreBasenames
=('CVS','.svn'),
78 self
._rootPath
= rootPath
79 self
._globPatterns
= globPatterns
80 self
._ignoreBasenames
= ignoreBasenames
81 self
._ignoreDirs
= ignoreDirs
84 self
.walkDirTree(rootPath
)
86 def walkDirTree(self
, dir='.',
93 """Recursively walk through a directory tree and find matching files."""
94 processDir
= self
.processDir
95 filterDir
= self
.filterDir
98 addDir
= pendingDirs
.append
99 getDir
= pendingDirs
.pop
107 for baseName
in listdir(dir):
108 fullPath
= join(dir, baseName
)
110 if filterDir(baseName
, fullPath
):
113 def filterDir(self
, baseName
, fullPath
):
115 """A hook for filtering out certain dirs. """
117 return not (baseName
in self
._ignoreBasenames
or
118 fullPath
in self
._ignoreDirs
)
120 def processDir(self
, dir, glob
=glob
):
121 extend
= self
._files
.extend
122 for pattern
in self
._globPatterns
:
123 extend( glob(os
.path
.join(dir, pattern
)) )
128 class _GenSubberFunc
:
130 """Converts a 'sub' string in the form that one feeds to re.sub (backrefs,
131 groups, etc.) into a function that can be used to do the substitutions in
132 the FindAndReplace class."""
134 backrefRE
= re
.compile(r
'\\([1-9][0-9]*)')
135 groupRE
= re
.compile(r
'\\g<([a-zA-Z_][a-zA-Z_]*)>')
137 def __init__(self
, replaceStr
):
138 self
._src
= replaceStr
140 self
._codeChunks
= []
149 def setPos(self
, pos
):
153 return self
._pos
>= len(self
._src
)
155 def advance(self
, offset
=1):
158 def readTo(self
, to
, start
=None):
163 return self
._src
[start
:]
165 return self
._src
[start
:to
]
167 ## match and get methods
169 def matchBackref(self
):
170 return self
.backrefRE
.match(self
.src(), self
.pos())
172 def getBackref(self
):
173 m
= self
.matchBackref()
177 def matchGroup(self
):
178 return self
.groupRE
.match(self
.src(), self
.pos())
181 m
= self
.matchGroup()
185 ## main parse loop and the eat methods
188 while not self
.atEnd():
189 if self
.matchBackref():
191 elif self
.matchGroup():
196 def eatStrConst(self
):
197 startPos
= self
.pos()
198 while not self
.atEnd():
199 if self
.matchBackref() or self
.matchGroup():
203 strConst
= self
.readTo(self
.pos(), start
=startPos
)
204 self
.addChunk(repr(strConst
))
206 def eatBackref(self
):
207 self
.addChunk( 'm.group(' + self
.getBackref() + ')' )
210 self
.addChunk( 'm.group("' + self
.getGroup() + '")' )
212 def addChunk(self
, chunk
):
213 self
._codeChunks
.append(chunk
)
215 ## code wrapping methods
218 return ', '.join(self
._codeChunks
)
221 return "def subber(m):\n\treturn ''.join([%s])\n" % (self
.codeBody())
223 def subberFunc(self
):
228 class FindAndReplace
:
230 """Find and replace all instances of 'patternOrRE' with 'replacement' for
231 each file in the 'files' list. This is a multi-file version of re.sub().
233 'patternOrRE' can be a raw regex pattern or
234 a regex object as generated by the re module. 'replacement' can be any
235 string that would work with patternOrRE.sub(replacement, fileContents).
238 def __init__(self
, files
, patternOrRE
, replacement
,
242 if type(patternOrRE
) == StringType
:
243 self
._regex
= re
.compile(patternOrRE
)
245 self
._regex
= patternOrRE
246 if type(replacement
) == StringType
:
247 self
._subber
= _GenSubberFunc(replacement
).subberFunc()
249 self
._subber
= replacement
251 self
._pattern
= pattern
= self
._regex
.pattern
254 self
._recordResults
= recordResults
256 ## see if we should use pgrep to do the file matching
257 self
._usePgrep
= False
258 if (os
.popen3('pgrep')[2].read()).startswith('Usage:'):
259 ## now check to make sure pgrep understands the pattern
261 open(tmpFile
, 'w').write('#')
262 if not (os
.popen3('pgrep "' + pattern
+ '" ' + tmpFile
)[2].read()):
263 # it didn't print an error msg so we're ok
264 self
._usePgrep
= True
274 subber
= self
._subDispatcher
275 usePgrep
= self
._usePgrep
276 pattern
= self
._pattern
277 for file in self
._files
:
278 if not os
.path
.isfile(file):
279 continue # skip dirs etc.
281 self
._currFile
= file
283 if locals().has_key('orig'):
286 if os
.popen('pgrep "' + pattern
+ '" ' + file ).read():
289 orig
= open(file).read()
290 if regex
.search(orig
):
293 if not locals().has_key('orig'):
294 orig
= open(file).read()
295 new
= regex
.sub(subber
, orig
)
296 open(file, 'w').write(new
)
298 def _subDispatcher(self
, match
):
299 if self
._recordResults
:
300 if not self
._results
.has_key(self
._currFile
):
301 res
= self
._results
[self
._currFile
] = {}
305 res
= self
._results
[self
._currFile
]
307 res
['matches'].append({'contents':match
.group(),
308 'start':match
.start(),
312 return self
._subber
(match
)
315 class SourceFileStats
:
322 def __init__(self
, files
):
323 self
._fileStats
= stats
= {}
325 stats
[file] = self
.getFileStats(file)
328 return self
._fileStats
335 for fileStats
in self
.rawStats().values():
336 codeLines
+= fileStats
['codeLines']
337 blankLines
+= fileStats
['blankLines']
338 commentLines
+= fileStats
['commentLines']
339 totalLines
+= fileStats
['totalLines']
341 stats
= {'codeLines':codeLines
,
342 'blankLines':blankLines
,
343 'commentLines':commentLines
,
344 'totalLines':totalLines
,
348 def printStats(self
):
351 def getFileStats(self
, fileName
):
355 commentLineRe
= re
.compile(r
'\s#.*$')
356 blankLineRe
= re
.compile('\s$')
357 lines
= open(fileName
).read().splitlines()
358 totalLines
= len(lines
)
361 if commentLineRe
.match(line
):
363 elif blankLineRe
.match(line
):
368 stats
= {'codeLines':codeLines
,
369 'blankLines':blankLines
,
370 'commentLines':commentLines
,
371 'totalLines':totalLines
,