4 # ************************************
5 # Extending Iterators for use as Queue
6 # ************************************
10 # :Copyright: 2005, 2007 Guenter Milde.
11 # Released under the terms of the GNU General Public License
13 # :Changelog: 2005-06-29 Initial version
14 # 2007-01-07 literate version, more examples
15 # :Abstract: There are many variants of "rich iterators" with varying
16 # efficiency, conventions, naming, and behaviour. This survey will
17 # compare them and provide a case for the inclusion of a "rich
18 # iterator wrapper" to the Python Standard Library
24 """iterqueue: mutable iterators
26 Classes for "extended iterators" with methods to let iterators be used as
30 `append left` -- push back values
31 `peek` -- get a value without "using it up"
32 `__nonzero__` -- test for empty iterator
38 # The `itertools` module provides a set of building blocks for the work with
39 # iterators (but misses a class for "mutable" iterators). ::
43 # The `collections` module with the efficient double-sided queue was
44 # introduced in Python 2.4. The following construct provides a minimal
45 # compatibility definition if it is not available::
48 from collections
import deque
51 def appendleft(self
, value
):
55 # Iterables and Iterators
56 # =======================
58 # Iterables and iterators are defined by the iterator protocol as laid out in
59 # the section on `Iterator Types`_ in the Python Library Reference`:
62 # One method needs to be defined for container objects to provide iteration
65 # :__iter__(): Return an iterator object. [...] If a container supports
66 # different types of iteration, additional methods can be
67 # provided to specifically request iterators for those
68 # iteration types. [...]
72 def is_iterable(object):
73 """Check if the argument is iterable"""
74 return hasattr(object, "__iter__") and is_iterator(iter(object))
77 # The *iterator objects* themselves are required to support the following
78 # two methods, which together form the *iterator protocol*:
80 # :__iter__(): Return the iterator object itself. This is required to allow
81 # both containers and iterators to be used with the `for` and
84 # :next(): Return the next item from the container. If there are no further
85 # items, raise the `StopIteration` exception.
87 # [...] once an iterator's next() method raises `StopIteration`,
88 # it will continue to do so on subsequent calls. Implementations
89 # that do not obey this property are deemed broken.
91 # Check if an object is an iterator::
93 def is_iterator(object):
94 """check if the argument is an iterator"""
95 if not hasattr(object, "__iter__"):
97 return (object.__iter
__() is object) and hasattr(object, "next")
102 # >>> import iterqueue
103 # >>> iterqueue.is_iterator(23)
105 # >>> iterqueue.is_iterator(iter(range(3)))
108 # The iterator protocol was primarily designed to be the *minimum* necessary
109 # to work in `for statements`, translating (behind the scene)::
111 # | for item in iterable:
114 # into the equivalent of::
116 # | iterator = iter(iterable)
119 # | item = iterator.next()
120 # | except StopIteration: break
124 # To add iterator behaviour to your classes, define an `__iter__` method which
125 # returns an object with a `next` method. If the class defines `next`, then
126 # `__iter__` can just return `self`. (`tutorial chapter on iterators`_)
128 # Python's *generators* provide a convenient way to implement the iterator
129 # protocol. Generator objects are returned by *generator functions* (functions
130 # with the ``yield`` keyword, new in 2.3) and *generator expressions* (new in
133 # .. _`Iterator Types`:
134 # http://docs.python.org/library/stdtypes.html#iterator-types
135 # .. _`tutorial chapter on iterators`:
136 # http://docs.python.org/tutorial/classes.html#iterators
138 # Limitations of iterator objects
139 # ===============================
141 # Most built-in Python iterator objects (including generator objects) are
142 # non-mutable (except the call to the `next` method). They "produce the data
143 # just in time", which is fast and memory efficient.
147 # 1. In some occasions, it is important to
149 # * find out whether an iterator is empty or
150 # * to "peek" at a data value
152 # without advancing the iterator.
154 # 2. In a state machine, an iterator holding the input values can be passed
155 # around to the state handling functions. If a state handler realises that
156 # a value should be processed by another state handler, it needs to
159 # 3. One might want modify the object iterated over in a `for` statement.
161 # Generally, the object in a `for` statement can not be changed inside the
164 # >>> from collections import deque
165 # >>> it = deque(range(3))
169 # ... it.appendleft("eins")
171 # Traceback (most recent call last):
172 # File "doctest.py", line 1248, in __run
173 # compileflags, 1) in test.globs
174 # File "<doctest iterqueue.py.txt[8]>", line 1, in ?
176 # RuntimeError: deque mutated during iteration
179 # ====================
181 # There are many ways to live with the limits of iterators. Most often it
182 # helps to get a true understanding of their nature and try to count for it in
183 # the code. However, the "never ending" discussion and varying recipes for
184 # enhanced iterators show the ongoing public demand. This is why I argue for
185 # the inclusion of a 'rich iterator' wrapper class into the standard library
186 # based on the _`standardisation argument` in the itertools_ module.
188 # Standardisation helps avoid the readability and reliability problems which
189 # arise when many different individuals create their own slightly varying
190 # implementations, each with their own quirks and naming conventions.
193 # .. _itertools: http://docs.python.org/library/itertools.html
195 # Recode to work with iterators as they are
196 # -----------------------------------------
198 # The most straightforward way is to translate code like
200 # >>> def print_first(l):
202 # ... print "list empty"
206 # >>> print_first([1, 2])
208 # >>> print_first([])
211 # into something in the line of
213 # >>> def print_next(it):
215 # ... value = it.next()
216 # ... except StopIteration:
217 # ... print "list empty"
221 # >>> print_next(iter([1, 2]))
223 # >>> print_next(iter([]))
226 # In a `for` statement, the `else` keyword can be utilised to call an
227 # expression (or a block) if the end of the iterator is reached:
229 # >>> def find_five(iterable):
230 # ... for i in iterable:
232 # ... print "5 found"
235 # ... print "5 not found"
237 # If the loop is aborted, the else clause is skipped
239 # >>> find_five(range(7))
242 # Otherwise it prints its message:
244 # >>> find_five(range(3))
247 # However, there might be cases where this is not applicable and a test for
248 # the emptiness or a peek at the first value without advancing the iterator
249 # would enable much cleaner code.
251 # Use a container object
252 # ----------------------
254 # One could wrap e.g. a generator into a `list` or `collections.deque` to add
255 # random access as well as extensibility.
257 # >>> que = deque(xrange(3))
258 # >>> que.appendleft("foo")
260 # deque(['foo', 0, 1, 2])
262 # However, it will put all iterator values into memory which becomes a problem
263 # for large iterators (and is non-feasible for unlimited iterators).
265 # Also, iterating in a `for` statement will loose the rich behaviour. Instead
266 # a construct with a `while` statement is needed, e.g:
268 # >>> que = deque(range(3))
270 # ... v = que.popleft()
273 # ... que.appendleft("eins")
277 # Use a rich iterator
278 # -------------------
280 # If the argument of a `for` statement is an iterator (whose `__iter__`
281 # method returns `self`), it is available unchanged inside the loop. A *rich
282 # iterator* provides additional methods besides the ones required for the
285 # State Reporting Iterator
286 # ~~~~~~~~~~~~~~~~~~~~~~~~
288 # An iterator that returns an indicator "full or empty" (values waiting or
289 # not) when converted to Boolean will be called *state reporting iterator*::
291 def is_state_reporting(object):
292 return hasattr(object, "__nonzero__") or hasattr(object, "__len__")
297 # An iterator that provides a `peek` method will be called a
298 # *peekable iterator*::
300 def is_peekable(object):
301 return hasattr(object, "peek")
307 # An iterator that provides for push-back will be called *push-iterator*::
309 def is_pushable(object):
310 return hasattr(object, "appendleft") or hasattr(object, "push")
312 # Push iterators can be easily extended with `peek` and test of emptiness
313 # (see `PushIterator`_).
317 # An iterator that also provides methods for appending and extending will be
318 # called *iterator_queue*.
320 # Methods that need access from the "right" side or knowledge of the length of
321 # the iterator are not included in the iterator_queue specification as they
322 # clash with the "just in time" acquisition of the values that give iterators
323 # time and memory advantage over sequences. ::
325 def is_iterator_queue(object):
326 return (is_state_reporting(object)
327 and hasattr(object, "append")
328 and hasattr(object, "appendleft")
329 and hasattr(object, "extend")
330 and hasattr(object, "extendleft")
331 and hasattr(object, "clear")
332 and hasattr(object, "rotate")
335 # Rich iterator examples
336 # ======================
338 # The following examples are the result of a net survey and own ideas.
339 # The will be compared and profiled in this paper.
341 # All of them are iterator-wrappers::
343 def is_iterator_wrapper(obj
):
344 """Try if obj can wrap an iterator"""
350 return is_iterator(it
)
357 # Tom Andersson suggested in the Python list an `xiterable protocol`__ for
358 # extended iterables and a wrapper to convert a "traditional" iterator to an
361 # __ http://mail.python.org/pipermail/python-list/2006-January/360162.html
366 if (hasattr(iterable
, "__xiter__")):
367 return iterable
.__xiter
__()
369 return xiterwrapper(iter(iterable
))
371 class xiterwrapper(object):
372 def __init__(self
, it
):
376 return hasattr(self
, "_next")
382 except AttributeError:
387 except AttributeError:
391 self
._next
= self
.it
.next()
392 except StopIteration:
393 if (hasattr(self
, "_next")):
403 # >>> import iterqueue
404 # >>> it = iterqueue.xiter(xrange(3))
405 # >>> iterqueue.is_iterator(it)
407 # >>> iterqueue.is_peekable(it)
409 # >>> iterqueue.is_pushable(it)
412 # We add the __nonzero__ method for a non-destructive test of waiting values
413 # to add the state reporting feature::
415 __nonzero__
= hasNext
417 # >>> iterqueue.is_state_reporting(it)
420 # Adding a `push` method is not possible without major changes to the code.
422 # IteratorWrapper BFL
423 # -------------------
425 # In a `post on python-3000`__ Guido van Rossum argued against inclusion of an
426 # "emptiness" test in the iterator protocol, as "that's just not something
427 # that generators can be expected to support" and hence would exclude
428 # generators from the definition of an iterator.
430 # __ http://mail.python.org/pipermail/python-3000/2006-March/000058.html
432 # ... you can always write a helper class that takes an iterator and
433 # returns an object that represents the same iterator, but sometimes
434 # buffers one element. But the buffering violates the coroutine-ish
435 # properties of generators, so it should not be the only (or even the
436 # default) way to access generators.
439 # Here's a sample wrapper (untested)
443 class IteratorWrapperBFL(object):
444 def __init__(self
, it
):
447 self
.buffered
= False
448 self
.exhausted
= False
454 self
.buffered
= False
458 raise StopIteration()
460 return self
.it
.next()
461 except StopIteration:
462 self
.exhausted
= True
464 def __nonzero__(self
):
470 self
.buffer = self
.it
.next()
471 except StopIteration:
472 self
.exhausted
= True
477 # This example provides an "emptiness" test but no peek or push-back:
479 # >>> it = iterqueue.IteratorWrapperBFL(xrange(3))
480 # >>> iterqueue.is_state_reporting(it)
483 # Peeking could be easily added, though::
486 self
.buffer = self
.next()
490 # >>> iterqueue.is_peekable(it)
496 # Daniel Dittmar wrote on Di 22 Jul. 2003 on comp.lang.python
498 # It shouldn't be too difficult to write an iterator wrapper class that does
499 # exactly what you want (not tested)::
501 class IteratorWrapperDD
:
502 def __init__ (self
, iterArg
):
503 iterArg
= iter (iterArg
)
505 self
.firstElement
= iterArg
.next ()
507 self
.next
= self
.returnFirstElement
508 self
.baseIter
= iterArg
509 except StopIteration:
511 self
.next
= self
.throwStopIteration
513 def returnFirstElement(self
):
514 self
.next
= self
.baseIter
.next
515 return self
.firstElement
517 def throwStopIteration(self
):
524 # In the slides to the `Effective Python Programming`_ OSCON 2005 tutorial by
525 # Anthony Baxter, I found a genially simple example for an iterator with a
528 # .. _`Effective Python Programming`:
529 # http://www.interlink.com.au/anthony/tech/talks/OSCON2005/effective_r27.pdf
534 def __init__(self
, iterable
):
535 """Store iterator as data argument and set up cache"""
536 self
.it
= iter(iterable
)
543 """Return next value (from cache or iterator)"""
545 return self
.cache
.pop()
546 return self
.it
.next()
548 def push(self
, value
):
549 """Push back one value (will become the `next` value)"""
550 self
.cache
.append(value
)
552 # Once `push` is defined, it is easy to add `peek` and `__nonzero__`.
554 # The question arises, what should be returned by `peek()` if the iterator is
555 # empty. The easiest option is to raise `StopIteration`, but this might come
556 # unhandy in some cases. My proposal is to add an optional `default` argument,
557 # which is returned in case the iterator is empty. (As there is no sensible
558 # default value for the `default` argument, it cannot be implemented as
559 # keyword arg, instead an argument list is used)::
561 def peek(self
, *defaults
):
562 """Return next value but do not advance the iterator"""
565 except StopIteration:
572 def __nonzero__(self
):
573 """Test whether the iterator is empty"""
576 except StopIteration:
580 # An alias makes the class more compatible with `collections.deque` ::
584 # Optimisation of `peek` and `__nonzero__` is is left out in favour of
590 # Create an instance from an iterable object:
592 # >>> it = iterqueue.PushIterator(range(4))
604 # the value is still there:
611 # >>> [i for i in it]
614 # It should be empty now:
619 # So a peek will return the default:
621 # >>> print it.peek(None)
627 # The wrapping of an iterator in a class leads to performance loss, as every
628 # call to `next()` is a relatively costly function call.
630 # Remapping of self.next leads to a more more efficient implementation
631 # of the PushIterator for the case that `peek` or `push` is called far
632 # less frequently than `next` ('normal' iterating with occasional peek or
635 class PushIt(PushIterator
):
636 def __init__(self
, iterable
):
637 self
.it
= iter(iterable
)
639 self
.next
= self
.it
.next
642 """Return next element. Try cache first."""
644 return self
.cache
.pop()
645 self
.next
= self
.it
.next
648 def push(self
, value
):
649 """Push back one value to the iterator"""
650 self
.cache
.append(value
)
651 self
.next
= self
._next
654 """Return next value but do not advance the iterator"""
656 return self
.cache
[-1]
657 value
= self
.it
.next()
665 # The `IterQueue` class adds iterator behaviour to a double-ended queue::
667 class IterQueue(deque
):
668 """Iterator object that is also a queue"""
673 return self
.popleft()
678 """Return next value but do not advance the iterator"""
680 self
.appendleft(value
)
687 # Creating an instance wraps an iterable in an iterator queue
689 # >>> it = iterqueue.IterQueue(xrange(3))
691 # which is an iterator according to the iterator protocol with "queue"
694 # >>> iterqueue.is_iterator_queue(it)
697 # We can test whether there is data in the iterator or get the length of it:
704 # It is possible to modify this iterator in the middle of a `for` statement:
709 # ... it.appendleft("eins")
713 # As iteration is done on the object itself and not on a copy, it is exhausted
719 # (the iterator advertises itself as `deque`, as we did not override the
722 # We can make up for this ::
725 return "<IterQueue instance>"
727 # but there might be other problems left...
733 # Converting an iterable to a `collections.deque` object creates a list of all
734 # values in the memory, loosing the memory saving advantages of generator
735 # objects with "just in time" production of the data.
737 # Printing (and probably other uses as well) "use up" the iterator
739 # >>> it = iterqueue.IterQueue(range(3))
749 # The following class implements an iterator queue that is
751 # * memory efficient, as generators are kept as generators
753 # * mostly compatible to `collections.deque` (offering all methods of a
754 # `deque` for appends)
758 # * random access to the values, nor
760 # * pop from the right end,
762 # as this would require to convert the iterator to a sequence loosing the
763 # memory-saving advantage.
765 # Iterating over instances is less fast, as the next() method is redefined
766 # (a function call is needed for every step). Implementing in C would help
771 # itertools.queue() was rejected because it didn't fit naturally into
772 # applications -- you had to completely twist your logic around just to
773 # accommodate it. Besides, it is already simple to iterate over a list
774 # while appending items to it as needed.
776 # -- Raymond Hettinger 03-13-05 http://www.codecomments.com/message423138.html
778 # However, both, the speed increase as well as the `standardisation argument`_
779 # given for the `itertools` hold also in this case
780 # Maybe IQueue should become a collections candidate?
785 """Iterator with "on-line extensibility"
787 An iterator with methods to append or extend it from left or right
788 (even while the iterator is in use).
790 Can be conceived as a mixture of `itertools.chain` and
793 As `next` is redefined, there is a performance loss when iterating
794 over large iterators.
797 def __init__(self
, *iterables
):
798 """Convert `iterables` to a queue object"""
799 self
.iterators
= deque(iterables
)
807 return self
.iterators
[0].next()
808 except AttributeError: # convert iterable to iterator
809 self
.iterators
[0] = iter(self
.iterators
[0])
810 except StopIteration: # switch to next iterator
811 del(self
.iterators
[0])
812 except IndexError: # all iterators exhausted
815 def append(self
, value
):
816 """append `value` to self
818 The value is wrapped in an iterable and
819 appended to the queue of iterables
821 self
.iterators
.append(iter((value
,)))
823 def appendleft(self
, value
):
824 """Prepend one (scalar) value to the iterator.
826 The value is wrapped in an iterable and
827 inserted at the first position in the list of iterables
829 self
.iterators
.appendleft(iter((value
,)))
832 """Remove all elements from the iterator.
834 self
.iterators
.clear()
836 def extend(self
, iterable
):
837 """append `iterable` to self"""
838 self
.iterators
.append(iter(iterable
))
840 def extendleft(self
, iterable
):
841 """prepend `iterable` to self"""
842 self
.iterators
.appendleft(iter(iterable
))
845 """Return the next value without advancing the iterator
847 Yield next value but push back a copy of the result.
848 This way you may "peak" at an iterator without loss.
850 Raises `StopIteration` if the iterator is empty.
853 self
.iterators
.appendleft(iter((value
,)))
856 def rotate(self
, n
=1):
857 """append the next `n` values to the end of the iterator
859 Similar to `container.deque.rotate`, but
860 * negative `n` leads to error
861 * a list of the `n` rotated values is returned
863 result
= list(itertools
.islice(self
, n
))
864 self
.iterators
.append(result
)
869 """Return a string representation"""
870 return "IQueue(%s)" % repr(self
.iterators
)
872 def __nonzero__(self
):
873 """Test for a non-zero length of the iterator"""
874 if len(self
.iterators
) > 1:
878 except StopIteration:
885 # The `XIter` class is an optimised version of the `IQueue` for the
886 # case when appending of a value is a done less frequently than calling `next`.
887 # It does so by aliasing next to the underlying iterators `next` method in
888 # case there is only one iterator in the `iterables` chain.
893 """'Mutable iterator' class"""
894 def __init__(self
, *iterables
):
895 self
.iterators
= deque(iter(i
) for i
in iterables
)
896 if len(self
.iterators
) is 1:
897 self
.next
= self
.iterators
[0].next
899 self
.next
= self
._next
# iterate over argument
901 def __iter__(self
): return self
# "I am an iterator!"
904 """get next in turn if there are more than one iterators"""
906 return self
.iterators
[0].next()
907 except StopIteration:
908 del(self
.iterators
[0]) # switch to next iterator
909 assert len(self
.iterators
) >= 1
910 if len(self
.iterators
) is 1:
911 self
.next
= self
.iterators
[0].next
914 def append(self
, element
):
915 """append `element` to self"""
916 self
.iterators
.append(iter((element
,)))
917 self
.next
= self
._next
# iterate over cache
919 def appendleft(self
, element
):
920 """prepend `element` to self"""
921 self
.iterators
.appendleft(iter((element
,)))
922 self
.next
= self
._next
# iterate over cache
924 def extend(self
, iterable
):
925 """append `iterable` to self"""
926 self
.iterators
.append(iter(iterable
))
927 self
.next
= self
._next
# iterate over cache
929 def extendleft(self
, iterable
):
930 """prepend `iterable` to self"""
931 self
.iterators
.appendleft(iter(iterable
))
932 self
.next
= self
._next
# iterate over cache
936 """Return the next value without advancing the iterator
938 Yield next value but push back a copy of the result.
939 This way you may "peak" at an iterator without loss.
941 Raises `StopIteration` if the iterator is empty.
944 self
.appendleft(value
)
947 def rotate(self
, n
=1):
948 """append the next `n` values to the end of the iterator
950 Similar to `container.deque.rotate`, but
951 * negative `n` leads to error
952 * a list of the `n` rotated values is returned
954 result
= list(itertools
.islice(self
, n
))
955 self
.iterators
.append(result
)
960 return "XIter(%s)" % repr(self
.iterators
)
962 def __nonzero__(self
):
963 """Test for a non-zero length of the iterator"""
964 if len(self
.iterators
) > 1:
968 except StopIteration:
973 # Some optimisation could be done adapting a `round-robin example`__ posted by
974 # R. Hettinger on 2004-04-30 15:58 in comp.lang.python
976 # __ http://sourceforge.net/tracker/index.php?func=detail&aid=756253&group_id=5470&atid=305470
981 # For the record, here a simple and efficient roundrobin task
982 # server based on collections.deque:
984 # def roundrobin(*iterables):
985 # pending = deque(iter(i).next for i in iterables)
986 # gettask, scheduletask = pending.popleft, pending.append
991 # except StopIteration:
995 # for value in roundrobin('abc', 'd', 'efgh'):
999 # Do a doctest if the module is run in nosetests::