Credit Nir Aides for r77288
[python.git] / Lib / multiprocessing / pool.py
blobb91b77d16d30c56bde2a83b5309e7f419cc13643
2 # Module providing the `Pool` class for managing a process pool
4 # multiprocessing/pool.py
6 # Copyright (c) 2007-2008, R Oudkerk --- see COPYING.txt
9 __all__ = ['Pool']
12 # Imports
15 import threading
16 import Queue
17 import itertools
18 import collections
19 import time
21 from multiprocessing import Process, cpu_count, TimeoutError
22 from multiprocessing.util import Finalize, debug
25 # Constants representing the state of a pool
28 RUN = 0
29 CLOSE = 1
30 TERMINATE = 2
33 # Miscellaneous
36 job_counter = itertools.count()
38 def mapstar(args):
39 return map(*args)
42 # Code run by worker processes
45 def worker(inqueue, outqueue, initializer=None, initargs=()):
46 put = outqueue.put
47 get = inqueue.get
48 if hasattr(inqueue, '_writer'):
49 inqueue._writer.close()
50 outqueue._reader.close()
52 if initializer is not None:
53 initializer(*initargs)
55 while 1:
56 try:
57 task = get()
58 except (EOFError, IOError):
59 debug('worker got EOFError or IOError -- exiting')
60 break
62 if task is None:
63 debug('worker got sentinel -- exiting')
64 break
66 job, i, func, args, kwds = task
67 try:
68 result = (True, func(*args, **kwds))
69 except Exception, e:
70 result = (False, e)
71 put((job, i, result))
74 # Class representing a process pool
77 class Pool(object):
78 '''
79 Class which supports an async version of the `apply()` builtin
80 '''
81 Process = Process
83 def __init__(self, processes=None, initializer=None, initargs=()):
84 self._setup_queues()
85 self._taskqueue = Queue.Queue()
86 self._cache = {}
87 self._state = RUN
89 if processes is None:
90 try:
91 processes = cpu_count()
92 except NotImplementedError:
93 processes = 1
95 if initializer is not None and not hasattr(initializer, '__call__'):
96 raise TypeError('initializer must be a callable')
98 self._pool = []
99 for i in range(processes):
100 w = self.Process(
101 target=worker,
102 args=(self._inqueue, self._outqueue, initializer, initargs)
104 self._pool.append(w)
105 w.name = w.name.replace('Process', 'PoolWorker')
106 w.daemon = True
107 w.start()
109 self._task_handler = threading.Thread(
110 target=Pool._handle_tasks,
111 args=(self._taskqueue, self._quick_put, self._outqueue, self._pool)
113 self._task_handler.daemon = True
114 self._task_handler._state = RUN
115 self._task_handler.start()
117 self._result_handler = threading.Thread(
118 target=Pool._handle_results,
119 args=(self._outqueue, self._quick_get, self._cache)
121 self._result_handler.daemon = True
122 self._result_handler._state = RUN
123 self._result_handler.start()
125 self._terminate = Finalize(
126 self, self._terminate_pool,
127 args=(self._taskqueue, self._inqueue, self._outqueue, self._pool,
128 self._task_handler, self._result_handler, self._cache),
129 exitpriority=15
132 def _setup_queues(self):
133 from .queues import SimpleQueue
134 self._inqueue = SimpleQueue()
135 self._outqueue = SimpleQueue()
136 self._quick_put = self._inqueue._writer.send
137 self._quick_get = self._outqueue._reader.recv
139 def apply(self, func, args=(), kwds={}):
141 Equivalent of `apply()` builtin
143 assert self._state == RUN
144 return self.apply_async(func, args, kwds).get()
146 def map(self, func, iterable, chunksize=None):
148 Equivalent of `map()` builtin
150 assert self._state == RUN
151 return self.map_async(func, iterable, chunksize).get()
153 def imap(self, func, iterable, chunksize=1):
155 Equivalent of `itertools.imap()` -- can be MUCH slower than `Pool.map()`
157 assert self._state == RUN
158 if chunksize == 1:
159 result = IMapIterator(self._cache)
160 self._taskqueue.put((((result._job, i, func, (x,), {})
161 for i, x in enumerate(iterable)), result._set_length))
162 return result
163 else:
164 assert chunksize > 1
165 task_batches = Pool._get_tasks(func, iterable, chunksize)
166 result = IMapIterator(self._cache)
167 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
168 for i, x in enumerate(task_batches)), result._set_length))
169 return (item for chunk in result for item in chunk)
171 def imap_unordered(self, func, iterable, chunksize=1):
173 Like `imap()` method but ordering of results is arbitrary
175 assert self._state == RUN
176 if chunksize == 1:
177 result = IMapUnorderedIterator(self._cache)
178 self._taskqueue.put((((result._job, i, func, (x,), {})
179 for i, x in enumerate(iterable)), result._set_length))
180 return result
181 else:
182 assert chunksize > 1
183 task_batches = Pool._get_tasks(func, iterable, chunksize)
184 result = IMapUnorderedIterator(self._cache)
185 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
186 for i, x in enumerate(task_batches)), result._set_length))
187 return (item for chunk in result for item in chunk)
189 def apply_async(self, func, args=(), kwds={}, callback=None):
191 Asynchronous equivalent of `apply()` builtin
193 assert self._state == RUN
194 result = ApplyResult(self._cache, callback)
195 self._taskqueue.put(([(result._job, None, func, args, kwds)], None))
196 return result
198 def map_async(self, func, iterable, chunksize=None, callback=None):
200 Asynchronous equivalent of `map()` builtin
202 assert self._state == RUN
203 if not hasattr(iterable, '__len__'):
204 iterable = list(iterable)
206 if chunksize is None:
207 chunksize, extra = divmod(len(iterable), len(self._pool) * 4)
208 if extra:
209 chunksize += 1
210 if len(iterable) == 0:
211 chunksize = 0
213 task_batches = Pool._get_tasks(func, iterable, chunksize)
214 result = MapResult(self._cache, chunksize, len(iterable), callback)
215 self._taskqueue.put((((result._job, i, mapstar, (x,), {})
216 for i, x in enumerate(task_batches)), None))
217 return result
219 @staticmethod
220 def _handle_tasks(taskqueue, put, outqueue, pool):
221 thread = threading.current_thread()
223 for taskseq, set_length in iter(taskqueue.get, None):
224 i = -1
225 for i, task in enumerate(taskseq):
226 if thread._state:
227 debug('task handler found thread._state != RUN')
228 break
229 try:
230 put(task)
231 except IOError:
232 debug('could not put task on queue')
233 break
234 else:
235 if set_length:
236 debug('doing set_length()')
237 set_length(i+1)
238 continue
239 break
240 else:
241 debug('task handler got sentinel')
244 try:
245 # tell result handler to finish when cache is empty
246 debug('task handler sending sentinel to result handler')
247 outqueue.put(None)
249 # tell workers there is no more work
250 debug('task handler sending sentinel to workers')
251 for p in pool:
252 put(None)
253 except IOError:
254 debug('task handler got IOError when sending sentinels')
256 debug('task handler exiting')
258 @staticmethod
259 def _handle_results(outqueue, get, cache):
260 thread = threading.current_thread()
262 while 1:
263 try:
264 task = get()
265 except (IOError, EOFError):
266 debug('result handler got EOFError/IOError -- exiting')
267 return
269 if thread._state:
270 assert thread._state == TERMINATE
271 debug('result handler found thread._state=TERMINATE')
272 break
274 if task is None:
275 debug('result handler got sentinel')
276 break
278 job, i, obj = task
279 try:
280 cache[job]._set(i, obj)
281 except KeyError:
282 pass
284 while cache and thread._state != TERMINATE:
285 try:
286 task = get()
287 except (IOError, EOFError):
288 debug('result handler got EOFError/IOError -- exiting')
289 return
291 if task is None:
292 debug('result handler ignoring extra sentinel')
293 continue
294 job, i, obj = task
295 try:
296 cache[job]._set(i, obj)
297 except KeyError:
298 pass
300 if hasattr(outqueue, '_reader'):
301 debug('ensuring that outqueue is not full')
302 # If we don't make room available in outqueue then
303 # attempts to add the sentinel (None) to outqueue may
304 # block. There is guaranteed to be no more than 2 sentinels.
305 try:
306 for i in range(10):
307 if not outqueue._reader.poll():
308 break
309 get()
310 except (IOError, EOFError):
311 pass
313 debug('result handler exiting: len(cache)=%s, thread._state=%s',
314 len(cache), thread._state)
316 @staticmethod
317 def _get_tasks(func, it, size):
318 it = iter(it)
319 while 1:
320 x = tuple(itertools.islice(it, size))
321 if not x:
322 return
323 yield (func, x)
325 def __reduce__(self):
326 raise NotImplementedError(
327 'pool objects cannot be passed between processes or pickled'
330 def close(self):
331 debug('closing pool')
332 if self._state == RUN:
333 self._state = CLOSE
334 self._taskqueue.put(None)
336 def terminate(self):
337 debug('terminating pool')
338 self._state = TERMINATE
339 self._terminate()
341 def join(self):
342 debug('joining pool')
343 assert self._state in (CLOSE, TERMINATE)
344 self._task_handler.join()
345 self._result_handler.join()
346 for p in self._pool:
347 p.join()
349 @staticmethod
350 def _help_stuff_finish(inqueue, task_handler, size):
351 # task_handler may be blocked trying to put items on inqueue
352 debug('removing tasks from inqueue until task handler finished')
353 inqueue._rlock.acquire()
354 while task_handler.is_alive() and inqueue._reader.poll():
355 inqueue._reader.recv()
356 time.sleep(0)
358 @classmethod
359 def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool,
360 task_handler, result_handler, cache):
361 # this is guaranteed to only be called once
362 debug('finalizing pool')
364 task_handler._state = TERMINATE
365 taskqueue.put(None) # sentinel
367 debug('helping task handler/workers to finish')
368 cls._help_stuff_finish(inqueue, task_handler, len(pool))
370 assert result_handler.is_alive() or len(cache) == 0
372 result_handler._state = TERMINATE
373 outqueue.put(None) # sentinel
375 if pool and hasattr(pool[0], 'terminate'):
376 debug('terminating workers')
377 for p in pool:
378 p.terminate()
380 debug('joining task handler')
381 task_handler.join(1e100)
383 debug('joining result handler')
384 result_handler.join(1e100)
386 if pool and hasattr(pool[0], 'terminate'):
387 debug('joining pool workers')
388 for p in pool:
389 p.join()
392 # Class whose instances are returned by `Pool.apply_async()`
395 class ApplyResult(object):
397 def __init__(self, cache, callback):
398 self._cond = threading.Condition(threading.Lock())
399 self._job = job_counter.next()
400 self._cache = cache
401 self._ready = False
402 self._callback = callback
403 cache[self._job] = self
405 def ready(self):
406 return self._ready
408 def successful(self):
409 assert self._ready
410 return self._success
412 def wait(self, timeout=None):
413 self._cond.acquire()
414 try:
415 if not self._ready:
416 self._cond.wait(timeout)
417 finally:
418 self._cond.release()
420 def get(self, timeout=None):
421 self.wait(timeout)
422 if not self._ready:
423 raise TimeoutError
424 if self._success:
425 return self._value
426 else:
427 raise self._value
429 def _set(self, i, obj):
430 self._success, self._value = obj
431 if self._callback and self._success:
432 self._callback(self._value)
433 self._cond.acquire()
434 try:
435 self._ready = True
436 self._cond.notify()
437 finally:
438 self._cond.release()
439 del self._cache[self._job]
442 # Class whose instances are returned by `Pool.map_async()`
445 class MapResult(ApplyResult):
447 def __init__(self, cache, chunksize, length, callback):
448 ApplyResult.__init__(self, cache, callback)
449 self._success = True
450 self._value = [None] * length
451 self._chunksize = chunksize
452 if chunksize <= 0:
453 self._number_left = 0
454 self._ready = True
455 else:
456 self._number_left = length//chunksize + bool(length % chunksize)
458 def _set(self, i, success_result):
459 success, result = success_result
460 if success:
461 self._value[i*self._chunksize:(i+1)*self._chunksize] = result
462 self._number_left -= 1
463 if self._number_left == 0:
464 if self._callback:
465 self._callback(self._value)
466 del self._cache[self._job]
467 self._cond.acquire()
468 try:
469 self._ready = True
470 self._cond.notify()
471 finally:
472 self._cond.release()
474 else:
475 self._success = False
476 self._value = result
477 del self._cache[self._job]
478 self._cond.acquire()
479 try:
480 self._ready = True
481 self._cond.notify()
482 finally:
483 self._cond.release()
486 # Class whose instances are returned by `Pool.imap()`
489 class IMapIterator(object):
491 def __init__(self, cache):
492 self._cond = threading.Condition(threading.Lock())
493 self._job = job_counter.next()
494 self._cache = cache
495 self._items = collections.deque()
496 self._index = 0
497 self._length = None
498 self._unsorted = {}
499 cache[self._job] = self
501 def __iter__(self):
502 return self
504 def next(self, timeout=None):
505 self._cond.acquire()
506 try:
507 try:
508 item = self._items.popleft()
509 except IndexError:
510 if self._index == self._length:
511 raise StopIteration
512 self._cond.wait(timeout)
513 try:
514 item = self._items.popleft()
515 except IndexError:
516 if self._index == self._length:
517 raise StopIteration
518 raise TimeoutError
519 finally:
520 self._cond.release()
522 success, value = item
523 if success:
524 return value
525 raise value
527 __next__ = next # XXX
529 def _set(self, i, obj):
530 self._cond.acquire()
531 try:
532 if self._index == i:
533 self._items.append(obj)
534 self._index += 1
535 while self._index in self._unsorted:
536 obj = self._unsorted.pop(self._index)
537 self._items.append(obj)
538 self._index += 1
539 self._cond.notify()
540 else:
541 self._unsorted[i] = obj
543 if self._index == self._length:
544 del self._cache[self._job]
545 finally:
546 self._cond.release()
548 def _set_length(self, length):
549 self._cond.acquire()
550 try:
551 self._length = length
552 if self._index == self._length:
553 self._cond.notify()
554 del self._cache[self._job]
555 finally:
556 self._cond.release()
559 # Class whose instances are returned by `Pool.imap_unordered()`
562 class IMapUnorderedIterator(IMapIterator):
564 def _set(self, i, obj):
565 self._cond.acquire()
566 try:
567 self._items.append(obj)
568 self._index += 1
569 self._cond.notify()
570 if self._index == self._length:
571 del self._cache[self._job]
572 finally:
573 self._cond.release()
579 class ThreadPool(Pool):
581 from .dummy import Process
583 def __init__(self, processes=None, initializer=None, initargs=()):
584 Pool.__init__(self, processes, initializer, initargs)
586 def _setup_queues(self):
587 self._inqueue = Queue.Queue()
588 self._outqueue = Queue.Queue()
589 self._quick_put = self._inqueue.put
590 self._quick_get = self._outqueue.get
592 @staticmethod
593 def _help_stuff_finish(inqueue, task_handler, size):
594 # put sentinels at head of inqueue to make workers finish
595 inqueue.not_empty.acquire()
596 try:
597 inqueue.queue.clear()
598 inqueue.queue.extend([None] * size)
599 inqueue.not_empty.notify_all()
600 finally:
601 inqueue.not_empty.release()