1 # A parallelized "find(1)" using the thread module.
3 # This demonstrates the use of a work queue and worker threads.
4 # It really does do more stats/sec when using multiple threads,
5 # although the improvement is only about 20-30 percent.
6 # (That was 8 years ago. In 2002, on Linux, I can't measure
9 # I'm too lazy to write a command line parser for the full find(1)
10 # command line syntax, so the predicate it searches for is wired-in,
11 # see function selector() below. (It currently searches for files with
12 # world write permission.)
14 # Usage: parfind.py [-w nworkers] [directory] ...
15 # Default nworkers is 4
27 # Work queue class. Usage:
29 # wq.addwork(func, (arg1, arg2, ...)) # one or more calls
31 # The work is done when wq.run() completes.
32 # The function calls executed by the workers may add more work.
33 # Don't use keyboard interrupts!
39 # - busy and work are only modified when mutex is locked
40 # - len(work) is the number of jobs ready to be taken
41 # - busy is the number of jobs being done
42 # - todo is locked iff there is no work and somebody is busy
45 self
.mutex
= thread
.allocate()
46 self
.todo
= thread
.allocate()
51 def addwork(self
, func
, args
):
56 if len(self
.work
) == 1:
62 if self
.busy
== 0 and len(self
.work
) == 0:
68 self
.busy
= self
.busy
+ 1
70 if len(self
.work
) > 0:
76 self
.busy
= self
.busy
- 1
77 if self
.busy
== 0 and len(self
.work
) == 0:
82 time
.sleep(0.00001) # Let other threads run
91 def run(self
, nworkers
):
93 return # Nothing to do
94 for i
in range(nworkers
-1):
95 thread
.start_new(self
._worker
, ())
104 opts
, args
= getopt
.getopt(sys
.argv
[1:], '-w:')
105 for opt
, arg
in opts
:
107 nworkers
= string
.atoi(arg
)
113 wq
.addwork(find
, (dir, selector
, wq
))
119 sys
.stderr
.write('Total time %r sec.\n' % (t2
-t1
))
122 # The predicate -- defines what files we look for.
123 # Feel free to change this to suit your purpose
125 def selector(dir, name
, fullname
, stat
):
126 # Look for world writable files that are not symlinks
127 return (stat
[ST_MODE
] & 0002) != 0 and not S_ISLNK(stat
[ST_MODE
])
130 # The find procedure -- calls wq.addwork() for subdirectories
132 def find(dir, pred
, wq
):
134 names
= os
.listdir(dir)
135 except os
.error
, msg
:
136 print repr(dir), ':', msg
139 if name
not in (os
.curdir
, os
.pardir
):
140 fullname
= os
.path
.join(dir, name
)
142 stat
= os
.lstat(fullname
)
143 except os
.error
, msg
:
144 print repr(fullname
), ':', msg
146 if pred(dir, name
, fullname
, stat
):
148 if S_ISDIR(stat
[ST_MODE
]):
149 if not os
.path
.ismount(fullname
):
150 wq
.addwork(find
, (fullname
, pred
, wq
))
153 # Call the main program