1 __all__
= ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
2 # For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3 # They should however be considered an integral part of collections.py.
6 __all__
+= _abcoll
.__all
__
8 from _collections
import deque
, defaultdict
9 from operator
import itemgetter
as _itemgetter
, eq
as _eq
10 from keyword
import iskeyword
as _iskeyword
12 import heapq
as _heapq
13 from itertools
import repeat
as _repeat
, chain
as _chain
, starmap
as _starmap
, \
14 ifilter
as _ifilter
, imap
as _imap
16 ################################################################################
18 ################################################################################
20 class OrderedDict(dict, MutableMapping
):
21 'Dictionary that remembers insertion order'
22 # An inherited dict maps keys to values.
23 # The inherited dict provides __getitem__, __len__, __contains__, and get.
24 # The remaining methods are order-aware.
25 # Big-O running times for all methods are the same as for regular dictionaries.
27 # The internal self.__map dictionary maps keys to links in a doubly linked list.
28 # The circular doubly linked list starts and ends with a sentinel element.
29 # The sentinel element never gets deleted (this simplifies the algorithm).
30 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
32 def __init__(self
, *args
, **kwds
):
33 '''Initialize an ordered dictionary. Signature is the same as for
34 regular dictionaries, but keyword arguments are not recommended
35 because their insertion order is arbitrary.
39 raise TypeError('expected at most 1 arguments, got %d' % len(args
))
42 except AttributeError:
43 self
.__root
= root
= [None, None, None] # sentinel node
46 root
[PREV
] = root
[NEXT
] = root
48 self
.update(*args
, **kwds
)
50 def __setitem__(self
, key
, value
, PREV
=0, NEXT
=1, dict_setitem
=dict.__setitem
__):
51 'od.__setitem__(i, y) <==> od[i]=y'
52 # Setting a new item creates a new link which goes at the end of the linked
53 # list, and the inherited dictionary is updated with the new key/value pair.
57 last
[NEXT
] = root
[PREV
] = self
.__map
[key
] = [last
, root
, key
]
58 dict_setitem(self
, key
, value
)
60 def __delitem__(self
, key
, PREV
=0, NEXT
=1, dict_delitem
=dict.__delitem
__):
61 'od.__delitem__(y) <==> del od[y]'
62 # Deleting an existing item uses self.__map to find the link which is
63 # then removed by updating the links in the predecessor and successor nodes.
64 dict_delitem(self
, key
)
65 link
= self
.__map
.pop(key
)
66 link_prev
= link
[PREV
]
67 link_next
= link
[NEXT
]
68 link_prev
[NEXT
] = link_next
69 link_next
[PREV
] = link_prev
71 def __iter__(self
, NEXT
=1, KEY
=2):
72 'od.__iter__() <==> iter(od)'
73 # Traverse the linked list in order.
76 while curr
is not root
:
80 def __reversed__(self
, PREV
=0, KEY
=2):
81 'od.__reversed__() <==> reversed(od)'
82 # Traverse the linked list in reverse order.
85 while curr
is not root
:
90 'Return state information for pickling'
91 items
= [[k
, self
[k
]] for k
in self
]
92 tmp
= self
.__map
, self
.__root
93 del self
.__map
, self
.__root
94 inst_dict
= vars(self
).copy()
95 self
.__map
, self
.__root
= tmp
97 return (self
.__class
__, (items
,), inst_dict
)
98 return self
.__class
__, (items
,)
101 'od.clear() -> None. Remove all items from od.'
103 for node
in self
.__map
.itervalues():
105 self
.__root
[:] = [self
.__root
, self
.__root
, None]
107 except AttributeError:
111 setdefault
= MutableMapping
.setdefault
112 update
= MutableMapping
.update
113 pop
= MutableMapping
.pop
114 keys
= MutableMapping
.keys
115 values
= MutableMapping
.values
116 items
= MutableMapping
.items
117 iterkeys
= MutableMapping
.iterkeys
118 itervalues
= MutableMapping
.itervalues
119 iteritems
= MutableMapping
.iteritems
120 __ne__
= MutableMapping
.__ne
__
122 def popitem(self
, last
=True):
123 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
124 Pairs are returned in LIFO order if last is true or FIFO order if false.
128 raise KeyError('dictionary is empty')
129 key
= next(reversed(self
) if last
else iter(self
))
130 value
= self
.pop(key
)
134 'od.__repr__() <==> repr(od)'
136 return '%s()' % (self
.__class
__.__name
__,)
137 return '%s(%r)' % (self
.__class
__.__name
__, self
.items())
140 'od.copy() -> a shallow copy of od'
141 return self
.__class
__(self
)
144 def fromkeys(cls
, iterable
, value
=None):
145 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
146 and values equal to v (which defaults to None).
154 def __eq__(self
, other
):
155 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
156 while comparison to a regular mapping is order-insensitive.
159 if isinstance(other
, OrderedDict
):
160 return len(self
)==len(other
) and \
161 all(_imap(_eq
, self
.iteritems(), other
.iteritems()))
162 return dict.__eq
__(self
, other
)
165 self
.clear() # eliminate cyclical references
168 ################################################################################
170 ################################################################################
172 def namedtuple(typename
, field_names
, verbose
=False, rename
=False):
173 """Returns a new subclass of tuple with named fields.
175 >>> Point = namedtuple('Point', 'x y')
176 >>> Point.__doc__ # docstring for the new class
178 >>> p = Point(11, y=22) # instantiate with positional args or keywords
179 >>> p[0] + p[1] # indexable like a plain tuple
181 >>> x, y = p # unpack like a regular tuple
184 >>> p.x + p.y # fields also accessable by name
186 >>> d = p._asdict() # convert to a dictionary
189 >>> Point(**d) # convert from a dictionary
191 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
196 # Parse and validate the field names. Validation serves two purposes,
197 # generating informative error messages and preventing template injection attacks.
198 if isinstance(field_names
, basestring
):
199 field_names
= field_names
.replace(',', ' ').split() # names separated by whitespace and/or commas
200 field_names
= tuple(map(str, field_names
))
202 names
= list(field_names
)
204 for i
, name
in enumerate(names
):
205 if (not all(c
.isalnum() or c
=='_' for c
in name
) or _iskeyword(name
)
206 or not name
or name
[0].isdigit() or name
.startswith('_')
210 field_names
= tuple(names
)
211 for name
in (typename
,) + field_names
:
212 if not all(c
.isalnum() or c
=='_' for c
in name
):
213 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name
)
215 raise ValueError('Type names and field names cannot be a keyword: %r' % name
)
216 if name
[0].isdigit():
217 raise ValueError('Type names and field names cannot start with a number: %r' % name
)
219 for name
in field_names
:
220 if name
.startswith('_') and not rename
:
221 raise ValueError('Field names cannot start with an underscore: %r' % name
)
222 if name
in seen_names
:
223 raise ValueError('Encountered duplicate field name: %r' % name
)
226 # Create and fill-in the class template
227 numfields
= len(field_names
)
228 argtxt
= repr(field_names
).replace("'", "")[1:-1] # tuple repr without parens or quotes
229 reprtxt
= ', '.join('%s=%%r' % name
for name
in field_names
)
230 template
= '''class %(typename)s(tuple):
231 '%(typename)s(%(argtxt)s)' \n
233 _fields = %(field_names)r \n
234 def __new__(_cls, %(argtxt)s):
235 'Create new instance of %(typename)s(%(argtxt)s)'
236 return _tuple.__new__(_cls, (%(argtxt)s)) \n
238 def _make(cls, iterable, new=tuple.__new__, len=len):
239 'Make a new %(typename)s object from a sequence or iterable'
240 result = new(cls, iterable)
241 if len(result) != %(numfields)d:
242 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
245 'Return a nicely formatted representation string'
246 return '%(typename)s(%(reprtxt)s)' %% self \n
248 'Return a new OrderedDict which maps field names to their values'
249 return OrderedDict(zip(self._fields, self)) \n
250 def _replace(_self, **kwds):
251 'Return a new %(typename)s object replacing specified fields with new values'
252 result = _self._make(map(kwds.pop, %(field_names)r, _self))
254 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
256 def __getnewargs__(self):
257 'Return self as a plain tuple. Used by copy and pickle.'
258 return tuple(self) \n\n''' % locals()
259 for i
, name
in enumerate(field_names
):
260 template
+= " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name
, i
, i
)
264 # Execute the template string in a temporary namespace and
265 # support tracing utilities by setting a value for frame.f_globals['__name__']
266 namespace
= dict(_itemgetter
=_itemgetter
, __name__
='namedtuple_%s' % typename
,
267 OrderedDict
=OrderedDict
, _property
=property, _tuple
=tuple)
269 exec template
in namespace
270 except SyntaxError, e
:
271 raise SyntaxError(e
.message
+ ':\n' + template
)
272 result
= namespace
[typename
]
274 # For pickling to work, the __module__ variable needs to be set to the frame
275 # where the named tuple is created. Bypass this step in enviroments where
276 # sys._getframe is not defined (Jython for example) or sys._getframe is not
277 # defined for arguments greater than 0 (IronPython).
279 result
.__module
__ = _sys
._getframe
(1).f_globals
.get('__name__', '__main__')
280 except (AttributeError, ValueError):
286 ########################################################################
288 ########################################################################
291 '''Dict subclass for counting hashable items. Sometimes called a bag
292 or multiset. Elements are stored as dictionary keys and their counts
293 are stored as dictionary values.
295 >>> c = Counter('abracadabra') # count elements from a string
297 >>> c.most_common(3) # three most common elements
298 [('a', 5), ('r', 2), ('b', 2)]
299 >>> sorted(c) # list all unique elements
300 ['a', 'b', 'c', 'd', 'r']
301 >>> ''.join(sorted(c.elements())) # list elements with repetitions
303 >>> sum(c.values()) # total of all counts
306 >>> c['a'] # count of letter 'a'
308 >>> for elem in 'shazam': # update counts from an iterable
309 ... c[elem] += 1 # by adding 1 to each element's count
310 >>> c['a'] # now there are seven 'a'
312 >>> del c['r'] # remove all 'r'
313 >>> c['r'] # now there are zero 'r'
316 >>> d = Counter('simsalabim') # make another counter
317 >>> c.update(d) # add in the second counter
318 >>> c['a'] # now there are nine 'a'
321 >>> c.clear() # empty the counter
325 Note: If a count is set to zero or reduced to zero, it will remain
326 in the counter until the entry is deleted or the counter is cleared:
328 >>> c = Counter('aaabbc')
329 >>> c['b'] -= 2 # reduce the count of 'b' by two
330 >>> c.most_common() # 'b' is still in, but its count is zero
331 [('a', 3), ('c', 1), ('b', 0)]
335 # http://en.wikipedia.org/wiki/Multiset
336 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
337 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
338 # http://code.activestate.com/recipes/259174/
339 # Knuth, TAOCP Vol. II section 4.6.3
341 def __init__(self
, iterable
=None, **kwds
):
342 '''Create a new, empty Counter object. And if given, count elements
343 from an input iterable. Or, initialize the count from another mapping
344 of elements to their counts.
346 >>> c = Counter() # a new, empty counter
347 >>> c = Counter('gallahad') # a new counter from an iterable
348 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
349 >>> c = Counter(a=4, b=2) # a new counter from keyword args
352 self
.update(iterable
, **kwds
)
354 def __missing__(self
, key
):
355 'The count of elements not in the Counter is zero.'
356 # Needed so that self[missing_item] does not raise KeyError
359 def most_common(self
, n
=None):
360 '''List the n most common elements and their counts from the most
361 common to the least. If n is None, then list all element counts.
363 >>> Counter('abracadabra').most_common(3)
364 [('a', 5), ('r', 2), ('b', 2)]
367 # Emulate Bag.sortedByCount from Smalltalk
369 return sorted(self
.iteritems(), key
=_itemgetter(1), reverse
=True)
370 return _heapq
.nlargest(n
, self
.iteritems(), key
=_itemgetter(1))
373 '''Iterator over elements repeating each as many times as its count.
375 >>> c = Counter('ABCABC')
376 >>> sorted(c.elements())
377 ['A', 'A', 'B', 'B', 'C', 'C']
379 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
380 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
382 >>> for factor in prime_factors.elements(): # loop over factors
383 ... product *= factor # and multiply them
387 Note, if an element's count has been set to zero or is a negative
388 number, elements() will ignore it.
391 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
392 return _chain
.from_iterable(_starmap(_repeat
, self
.iteritems()))
394 # Override dict methods where necessary
397 def fromkeys(cls
, iterable
, v
=None):
398 # There is no equivalent method for counters because setting v=1
399 # means that no element can have a count greater than one.
400 raise NotImplementedError(
401 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
403 def update(self
, iterable
=None, **kwds
):
404 '''Like dict.update() but add counts instead of replacing them.
406 Source can be an iterable, a dictionary, or another Counter instance.
408 >>> c = Counter('which')
409 >>> c.update('witch') # add elements from another iterable
410 >>> d = Counter('watch')
411 >>> c.update(d) # add elements from another counter
412 >>> c['h'] # four 'h' in which, witch, and watch
416 # The regular dict.update() operation makes no sense here because the
417 # replace behavior results in the some of original untouched counts
418 # being mixed-in with all of the other counts for a mismash that
419 # doesn't have a straight-forward interpretation in most counting
420 # contexts. Instead, we implement straight-addition. Both the inputs
421 # and outputs are allowed to contain zero and negative counts.
423 if iterable
is not None:
424 if isinstance(iterable
, Mapping
):
427 for elem
, count
in iterable
.iteritems():
428 self
[elem
] = self_get(elem
, 0) + count
430 dict.update(self
, iterable
) # fast path when counter is empty
433 for elem
in iterable
:
434 self
[elem
] = self_get(elem
, 0) + 1
438 def subtract(self
, iterable
=None, **kwds
):
439 '''Like dict.update() but subtracts counts instead of replacing them.
440 Counts can be reduced below zero. Both the inputs and outputs are
441 allowed to contain zero and negative counts.
443 Source can be an iterable, a dictionary, or another Counter instance.
445 >>> c = Counter('which')
446 >>> c.subtract('witch') # subtract elements from another iterable
447 >>> c.subtract(Counter('watch')) # subtract elements from another counter
448 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
450 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
454 if iterable
is not None:
456 if isinstance(iterable
, Mapping
):
457 for elem
, count
in iterable
.items():
458 self
[elem
] = self_get(elem
, 0) - count
460 for elem
in iterable
:
461 self
[elem
] = self_get(elem
, 0) - 1
466 'Like dict.copy() but returns a Counter instance instead of a dict.'
469 def __delitem__(self
, elem
):
470 'Like dict.__delitem__() but does not raise KeyError for missing values.'
472 dict.__delitem
__(self
, elem
)
476 return '%s()' % self
.__class
__.__name
__
477 items
= ', '.join(map('%r: %r'.__mod
__, self
.most_common()))
478 return '%s({%s})' % (self
.__class
__.__name
__, items
)
480 # Multiset-style mathematical operations discussed in:
481 # Knuth TAOCP Volume II section 4.6.3 exercise 19
482 # and at http://en.wikipedia.org/wiki/Multiset
484 # Outputs guaranteed to only include positive counts.
486 # To strip negative and zero counts, add-in an empty counter:
489 def __add__(self
, other
):
490 '''Add counts from two counters.
492 >>> Counter('abbb') + Counter('bcc')
493 Counter({'b': 4, 'c': 2, 'a': 1})
496 if not isinstance(other
, Counter
):
497 return NotImplemented
499 for elem
in set(self
) |
set(other
):
500 newcount
= self
[elem
] + other
[elem
]
502 result
[elem
] = newcount
505 def __sub__(self
, other
):
506 ''' Subtract count, but keep only results with positive counts.
508 >>> Counter('abbbc') - Counter('bccd')
509 Counter({'b': 2, 'a': 1})
512 if not isinstance(other
, Counter
):
513 return NotImplemented
515 for elem
in set(self
) |
set(other
):
516 newcount
= self
[elem
] - other
[elem
]
518 result
[elem
] = newcount
521 def __or__(self
, other
):
522 '''Union is the maximum of value in either of the input counters.
524 >>> Counter('abbb') | Counter('bcc')
525 Counter({'b': 3, 'c': 2, 'a': 1})
528 if not isinstance(other
, Counter
):
529 return NotImplemented
531 for elem
in set(self
) |
set(other
):
532 p
, q
= self
[elem
], other
[elem
]
533 newcount
= q
if p
< q
else p
535 result
[elem
] = newcount
538 def __and__(self
, other
):
539 ''' Intersection is the minimum of corresponding counts.
541 >>> Counter('abbb') & Counter('bcc')
545 if not isinstance(other
, Counter
):
546 return NotImplemented
548 if len(self
) < len(other
):
549 self
, other
= other
, self
550 for elem
in _ifilter(self
.__contains
__, other
):
551 p
, q
= self
[elem
], other
[elem
]
552 newcount
= p
if p
< q
else q
554 result
[elem
] = newcount
558 if __name__
== '__main__':
559 # verify that instances can be pickled
560 from cPickle
import loads
, dumps
561 Point
= namedtuple('Point', 'x, y', True)
562 p
= Point(x
=10, y
=20)
563 assert p
== loads(dumps(p
))
565 # test and demonstrate ability to override methods
566 class Point(namedtuple('Point', 'x y')):
570 return (self
.x
** 2 + self
.y
** 2) ** 0.5
572 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self
.x
, self
.y
, self
.hypot
)
574 for p
in Point(3, 4), Point(14, 5/7.):
577 class Point(namedtuple('Point', 'x y')):
578 'Point class with optimized _make() and _replace() without error-checking'
580 _make
= classmethod(tuple.__new
__)
581 def _replace(self
, _map
=map, **kwds
):
582 return self
._make
(_map(kwds
.get
, ('x', 'y'), self
))
584 print Point(11, 22)._replace
(x
=100)
586 Point3D
= namedtuple('Point3D', Point
._fields
+ ('z',))
587 print Point3D
.__doc
__
590 TestResults
= namedtuple('TestResults', 'failed attempted')
591 print TestResults(*doctest
.testmod())