1 __all__
= ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
2 # For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3 # They should however be considered an integral part of collections.py.
6 __all__
+= _abcoll
.__all
__
8 from _collections
import deque
, defaultdict
9 from operator
import itemgetter
as _itemgetter
, eq
as _eq
10 from keyword
import iskeyword
as _iskeyword
12 import heapq
as _heapq
13 from weakref
import proxy
as _proxy
14 from itertools
import repeat
as _repeat
, chain
as _chain
, starmap
as _starmap
, \
15 ifilter
as _ifilter
, imap
as _imap
, izip
as _izip
17 ################################################################################
19 ################################################################################
22 __slots__
= 'prev', 'next', 'key', '__weakref__'
24 class OrderedDict(dict, MutableMapping
):
25 'Dictionary that remembers insertion order'
26 # An inherited dict maps keys to values.
27 # The inherited dict provides __getitem__, __len__, __contains__, and get.
28 # The remaining methods are order-aware.
29 # Big-O running times for all methods are the same as for regular dictionaries.
31 # The internal self.__map dictionary maps keys to links in a doubly linked list.
32 # The circular doubly linked list starts and ends with a sentinel element.
33 # The sentinel element never gets deleted (this simplifies the algorithm).
34 # The prev/next links are weakref proxies (to prevent circular references).
35 # Individual links are kept alive by the hard reference in self.__map.
36 # Those hard references disappear when a key is deleted from an OrderedDict.
38 def __init__(self
, *args
, **kwds
):
39 '''Initialize an ordered dictionary. Signature is the same as for
40 regular dictionaries, but keyword arguments are not recommended
41 because their insertion order is arbitrary.
45 raise TypeError('expected at most 1 arguments, got %d' % len(args
))
48 except AttributeError:
49 self
.__root
= root
= _Link() # sentinel node for the doubly linked list
50 root
.prev
= root
.next
= root
52 self
.update(*args
, **kwds
)
55 'od.clear() -> None. Remove all items from od.'
57 root
.prev
= root
.next
= root
61 def __setitem__(self
, key
, value
):
62 'od.__setitem__(i, y) <==> od[i]=y'
63 # Setting a new item creates a new link which goes at the end of the linked
64 # list, and the inherited dictionary is updated with the new key/value pair.
66 self
.__map
[key
] = link
= _Link()
69 link
.prev
, link
.next
, link
.key
= last
, root
, key
70 last
.next
= root
.prev
= _proxy(link
)
71 dict.__setitem
__(self
, key
, value
)
73 def __delitem__(self
, key
):
74 'od.__delitem__(y) <==> del od[y]'
75 # Deleting an existing item uses self.__map to find the link which is
76 # then removed by updating the links in the predecessor and successor nodes.
77 dict.__delitem
__(self
, key
)
78 link
= self
.__map
.pop(key
)
79 link
.prev
.next
= link
.next
80 link
.next
.prev
= link
.prev
83 'od.__iter__() <==> iter(od)'
84 # Traverse the linked list in order.
87 while curr
is not root
:
91 def __reversed__(self
):
92 'od.__reversed__() <==> reversed(od)'
93 # Traverse the linked list in reverse order.
96 while curr
is not root
:
100 def __reduce__(self
):
101 'Return state information for pickling'
102 items
= [[k
, self
[k
]] for k
in self
]
103 tmp
= self
.__map
, self
.__root
104 del self
.__map
, self
.__root
105 inst_dict
= vars(self
).copy()
106 self
.__map
, self
.__root
= tmp
108 return (self
.__class
__, (items
,), inst_dict
)
109 return self
.__class
__, (items
,)
111 setdefault
= MutableMapping
.setdefault
112 update
= MutableMapping
.update
113 pop
= MutableMapping
.pop
114 keys
= MutableMapping
.keys
115 values
= MutableMapping
.values
116 items
= MutableMapping
.items
117 iterkeys
= MutableMapping
.iterkeys
118 itervalues
= MutableMapping
.itervalues
119 iteritems
= MutableMapping
.iteritems
120 __ne__
= MutableMapping
.__ne
__
122 def popitem(self
, last
=True):
123 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
124 Pairs are returned in LIFO order if last is true or FIFO order if false.
128 raise KeyError('dictionary is empty')
129 key
= next(reversed(self
) if last
else iter(self
))
130 value
= self
.pop(key
)
134 'od.__repr__() <==> repr(od)'
136 return '%s()' % (self
.__class
__.__name
__,)
137 return '%s(%r)' % (self
.__class
__.__name
__, self
.items())
140 'od.copy() -> a shallow copy of od'
141 return self
.__class
__(self
)
144 def fromkeys(cls
, iterable
, value
=None):
145 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
146 and values equal to v (which defaults to None).
154 def __eq__(self
, other
):
155 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
156 while comparison to a regular mapping is order-insensitive.
159 if isinstance(other
, OrderedDict
):
160 return len(self
)==len(other
) and \
161 all(_imap(_eq
, self
.iteritems(), other
.iteritems()))
162 return dict.__eq
__(self
, other
)
166 ################################################################################
168 ################################################################################
170 def namedtuple(typename
, field_names
, verbose
=False, rename
=False):
171 """Returns a new subclass of tuple with named fields.
173 >>> Point = namedtuple('Point', 'x y')
174 >>> Point.__doc__ # docstring for the new class
176 >>> p = Point(11, y=22) # instantiate with positional args or keywords
177 >>> p[0] + p[1] # indexable like a plain tuple
179 >>> x, y = p # unpack like a regular tuple
182 >>> p.x + p.y # fields also accessable by name
184 >>> d = p._asdict() # convert to a dictionary
187 >>> Point(**d) # convert from a dictionary
189 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
194 # Parse and validate the field names. Validation serves two purposes,
195 # generating informative error messages and preventing template injection attacks.
196 if isinstance(field_names
, basestring
):
197 field_names
= field_names
.replace(',', ' ').split() # names separated by whitespace and/or commas
198 field_names
= tuple(map(str, field_names
))
200 names
= list(field_names
)
202 for i
, name
in enumerate(names
):
203 if (not all(c
.isalnum() or c
=='_' for c
in name
) or _iskeyword(name
)
204 or not name
or name
[0].isdigit() or name
.startswith('_')
208 field_names
= tuple(names
)
209 for name
in (typename
,) + field_names
:
210 if not all(c
.isalnum() or c
=='_' for c
in name
):
211 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name
)
213 raise ValueError('Type names and field names cannot be a keyword: %r' % name
)
214 if name
[0].isdigit():
215 raise ValueError('Type names and field names cannot start with a number: %r' % name
)
217 for name
in field_names
:
218 if name
.startswith('_') and not rename
:
219 raise ValueError('Field names cannot start with an underscore: %r' % name
)
220 if name
in seen_names
:
221 raise ValueError('Encountered duplicate field name: %r' % name
)
224 # Create and fill-in the class template
225 numfields
= len(field_names
)
226 argtxt
= repr(field_names
).replace("'", "")[1:-1] # tuple repr without parens or quotes
227 reprtxt
= ', '.join('%s=%%r' % name
for name
in field_names
)
228 template
= '''class %(typename)s(tuple):
229 '%(typename)s(%(argtxt)s)' \n
231 _fields = %(field_names)r \n
232 def __new__(cls, %(argtxt)s):
233 return tuple.__new__(cls, (%(argtxt)s)) \n
235 def _make(cls, iterable, new=tuple.__new__, len=len):
236 'Make a new %(typename)s object from a sequence or iterable'
237 result = new(cls, iterable)
238 if len(result) != %(numfields)d:
239 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
242 return '%(typename)s(%(reprtxt)s)' %% self \n
244 'Return a new OrderedDict which maps field names to their values'
245 return OrderedDict(zip(self._fields, self)) \n
246 def _replace(self, **kwds):
247 'Return a new %(typename)s object replacing specified fields with new values'
248 result = self._make(map(kwds.pop, %(field_names)r, self))
250 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
252 def __getnewargs__(self):
253 return tuple(self) \n\n''' % locals()
254 for i
, name
in enumerate(field_names
):
255 template
+= ' %s = property(itemgetter(%d))\n' % (name
, i
)
259 # Execute the template string in a temporary namespace and
260 # support tracing utilities by setting a value for frame.f_globals['__name__']
261 namespace
= dict(itemgetter
=_itemgetter
, __name__
='namedtuple_%s' % typename
,
262 OrderedDict
=OrderedDict
)
264 exec template
in namespace
265 except SyntaxError, e
:
266 raise SyntaxError(e
.message
+ ':\n' + template
)
267 result
= namespace
[typename
]
269 # For pickling to work, the __module__ variable needs to be set to the frame
270 # where the named tuple is created. Bypass this step in enviroments where
271 # sys._getframe is not defined (Jython for example).
272 if hasattr(_sys
, '_getframe'):
273 result
.__module
__ = _sys
._getframe
(1).f_globals
.get('__name__', '__main__')
278 ########################################################################
280 ########################################################################
283 '''Dict subclass for counting hashable items. Sometimes called a bag
284 or multiset. Elements are stored as dictionary keys and their counts
285 are stored as dictionary values.
287 >>> c = Counter('abracadabra') # count elements from a string
289 >>> c.most_common(3) # three most common elements
290 [('a', 5), ('r', 2), ('b', 2)]
291 >>> sorted(c) # list all unique elements
292 ['a', 'b', 'c', 'd', 'r']
293 >>> ''.join(sorted(c.elements())) # list elements with repetitions
295 >>> sum(c.values()) # total of all counts
298 >>> c['a'] # count of letter 'a'
300 >>> for elem in 'shazam': # update counts from an iterable
301 ... c[elem] += 1 # by adding 1 to each element's count
302 >>> c['a'] # now there are seven 'a'
304 >>> del c['r'] # remove all 'r'
305 >>> c['r'] # now there are zero 'r'
308 >>> d = Counter('simsalabim') # make another counter
309 >>> c.update(d) # add in the second counter
310 >>> c['a'] # now there are nine 'a'
313 >>> c.clear() # empty the counter
317 Note: If a count is set to zero or reduced to zero, it will remain
318 in the counter until the entry is deleted or the counter is cleared:
320 >>> c = Counter('aaabbc')
321 >>> c['b'] -= 2 # reduce the count of 'b' by two
322 >>> c.most_common() # 'b' is still in, but its count is zero
323 [('a', 3), ('c', 1), ('b', 0)]
327 # http://en.wikipedia.org/wiki/Multiset
328 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
329 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
330 # http://code.activestate.com/recipes/259174/
331 # Knuth, TAOCP Vol. II section 4.6.3
333 def __init__(self
, iterable
=None, **kwds
):
334 '''Create a new, empty Counter object. And if given, count elements
335 from an input iterable. Or, initialize the count from another mapping
336 of elements to their counts.
338 >>> c = Counter() # a new, empty counter
339 >>> c = Counter('gallahad') # a new counter from an iterable
340 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
341 >>> c = Counter(a=4, b=2) # a new counter from keyword args
344 self
.update(iterable
, **kwds
)
346 def __missing__(self
, key
):
347 'The count of elements not in the Counter is zero.'
348 # Needed so that self[missing_item] does not raise KeyError
351 def most_common(self
, n
=None):
352 '''List the n most common elements and their counts from the most
353 common to the least. If n is None, then list all element counts.
355 >>> Counter('abracadabra').most_common(3)
356 [('a', 5), ('r', 2), ('b', 2)]
359 # Emulate Bag.sortedByCount from Smalltalk
361 return sorted(self
.iteritems(), key
=_itemgetter(1), reverse
=True)
362 return _heapq
.nlargest(n
, self
.iteritems(), key
=_itemgetter(1))
365 '''Iterator over elements repeating each as many times as its count.
367 >>> c = Counter('ABCABC')
368 >>> sorted(c.elements())
369 ['A', 'A', 'B', 'B', 'C', 'C']
371 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
372 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
374 >>> for factor in prime_factors.elements(): # loop over factors
375 ... product *= factor # and multiply them
379 Note, if an element's count has been set to zero or is a negative
380 number, elements() will ignore it.
383 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
384 return _chain
.from_iterable(_starmap(_repeat
, self
.iteritems()))
386 # Override dict methods where necessary
389 def fromkeys(cls
, iterable
, v
=None):
390 # There is no equivalent method for counters because setting v=1
391 # means that no element can have a count greater than one.
392 raise NotImplementedError(
393 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
395 def update(self
, iterable
=None, **kwds
):
396 '''Like dict.update() but add counts instead of replacing them.
398 Source can be an iterable, a dictionary, or another Counter instance.
400 >>> c = Counter('which')
401 >>> c.update('witch') # add elements from another iterable
402 >>> d = Counter('watch')
403 >>> c.update(d) # add elements from another counter
404 >>> c['h'] # four 'h' in which, witch, and watch
408 # The regular dict.update() operation makes no sense here because the
409 # replace behavior results in the some of original untouched counts
410 # being mixed-in with all of the other counts for a mismash that
411 # doesn't have a straight-forward interpretation in most counting
412 # contexts. Instead, we implement straight-addition. Both the inputs
413 # and outputs are allowed to contain zero and negative counts.
415 if iterable
is not None:
416 if isinstance(iterable
, Mapping
):
418 for elem
, count
in iterable
.iteritems():
421 dict.update(self
, iterable
) # fast path when counter is empty
423 for elem
in iterable
:
429 'Like dict.copy() but returns a Counter instance instead of a dict.'
432 def __delitem__(self
, elem
):
433 'Like dict.__delitem__() but does not raise KeyError for missing values.'
435 dict.__delitem
__(self
, elem
)
439 return '%s()' % self
.__class
__.__name
__
440 items
= ', '.join(map('%r: %r'.__mod
__, self
.most_common()))
441 return '%s({%s})' % (self
.__class
__.__name
__, items
)
443 # Multiset-style mathematical operations discussed in:
444 # Knuth TAOCP Volume II section 4.6.3 exercise 19
445 # and at http://en.wikipedia.org/wiki/Multiset
447 # Outputs guaranteed to only include positive counts.
449 # To strip negative and zero counts, add-in an empty counter:
452 def __add__(self
, other
):
453 '''Add counts from two counters.
455 >>> Counter('abbb') + Counter('bcc')
456 Counter({'b': 4, 'c': 2, 'a': 1})
459 if not isinstance(other
, Counter
):
460 return NotImplemented
462 for elem
in set(self
) |
set(other
):
463 newcount
= self
[elem
] + other
[elem
]
465 result
[elem
] = newcount
468 def __sub__(self
, other
):
469 ''' Subtract count, but keep only results with positive counts.
471 >>> Counter('abbbc') - Counter('bccd')
472 Counter({'b': 2, 'a': 1})
475 if not isinstance(other
, Counter
):
476 return NotImplemented
478 for elem
in set(self
) |
set(other
):
479 newcount
= self
[elem
] - other
[elem
]
481 result
[elem
] = newcount
484 def __or__(self
, other
):
485 '''Union is the maximum of value in either of the input counters.
487 >>> Counter('abbb') | Counter('bcc')
488 Counter({'b': 3, 'c': 2, 'a': 1})
491 if not isinstance(other
, Counter
):
492 return NotImplemented
494 for elem
in set(self
) |
set(other
):
495 p
, q
= self
[elem
], other
[elem
]
496 newcount
= q
if p
< q
else p
498 result
[elem
] = newcount
501 def __and__(self
, other
):
502 ''' Intersection is the minimum of corresponding counts.
504 >>> Counter('abbb') & Counter('bcc')
508 if not isinstance(other
, Counter
):
509 return NotImplemented
511 if len(self
) < len(other
):
512 self
, other
= other
, self
513 for elem
in _ifilter(self
.__contains
__, other
):
514 p
, q
= self
[elem
], other
[elem
]
515 newcount
= p
if p
< q
else q
517 result
[elem
] = newcount
521 if __name__
== '__main__':
522 # verify that instances can be pickled
523 from cPickle
import loads
, dumps
524 Point
= namedtuple('Point', 'x, y', True)
525 p
= Point(x
=10, y
=20)
526 assert p
== loads(dumps(p
))
528 # test and demonstrate ability to override methods
529 class Point(namedtuple('Point', 'x y')):
533 return (self
.x
** 2 + self
.y
** 2) ** 0.5
535 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self
.x
, self
.y
, self
.hypot
)
537 for p
in Point(3, 4), Point(14, 5/7.):
540 class Point(namedtuple('Point', 'x y')):
541 'Point class with optimized _make() and _replace() without error-checking'
543 _make
= classmethod(tuple.__new
__)
544 def _replace(self
, _map
=map, **kwds
):
545 return self
._make
(_map(kwds
.get
, ('x', 'y'), self
))
547 print Point(11, 22)._replace
(x
=100)
549 Point3D
= namedtuple('Point3D', Point
._fields
+ ('z',))
550 print Point3D
.__doc
__
553 TestResults
= namedtuple('TestResults', 'failed attempted')
554 print TestResults(*doctest
.testmod())