add release date
[python/dscho.git] / Lib / collections.py
blob20cc6003bc5970d943a30421296b0886840e1157
1 __all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
2 # For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3 # They should however be considered an integral part of collections.py.
4 from _abcoll import *
5 import _abcoll
6 __all__ += _abcoll.__all__
8 from _collections import deque, defaultdict
9 from operator import itemgetter as _itemgetter, eq as _eq
10 from keyword import iskeyword as _iskeyword
11 import sys as _sys
12 import heapq as _heapq
13 from itertools import repeat as _repeat, chain as _chain, starmap as _starmap, \
14 ifilter as _ifilter, imap as _imap
16 ################################################################################
17 ### OrderedDict
18 ################################################################################
20 class OrderedDict(dict, MutableMapping):
21 'Dictionary that remembers insertion order'
22 # An inherited dict maps keys to values.
23 # The inherited dict provides __getitem__, __len__, __contains__, and get.
24 # The remaining methods are order-aware.
25 # Big-O running times for all methods are the same as for regular dictionaries.
27 # The internal self.__map dictionary maps keys to links in a doubly linked list.
28 # The circular doubly linked list starts and ends with a sentinel element.
29 # The sentinel element never gets deleted (this simplifies the algorithm).
30 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
32 def __init__(self, *args, **kwds):
33 '''Initialize an ordered dictionary. Signature is the same as for
34 regular dictionaries, but keyword arguments are not recommended
35 because their insertion order is arbitrary.
37 '''
38 if len(args) > 1:
39 raise TypeError('expected at most 1 arguments, got %d' % len(args))
40 try:
41 self.__root
42 except AttributeError:
43 self.__root = root = [None, None, None] # sentinel node
44 PREV = 0
45 NEXT = 1
46 root[PREV] = root[NEXT] = root
47 self.__map = {}
48 self.update(*args, **kwds)
50 def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
51 'od.__setitem__(i, y) <==> od[i]=y'
52 # Setting a new item creates a new link which goes at the end of the linked
53 # list, and the inherited dictionary is updated with the new key/value pair.
54 if key not in self:
55 root = self.__root
56 last = root[PREV]
57 last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
58 dict_setitem(self, key, value)
60 def __delitem__(self, key, PREV=0, NEXT=1, dict_delitem=dict.__delitem__):
61 'od.__delitem__(y) <==> del od[y]'
62 # Deleting an existing item uses self.__map to find the link which is
63 # then removed by updating the links in the predecessor and successor nodes.
64 dict_delitem(self, key)
65 link = self.__map.pop(key)
66 link_prev = link[PREV]
67 link_next = link[NEXT]
68 link_prev[NEXT] = link_next
69 link_next[PREV] = link_prev
71 def __iter__(self, NEXT=1, KEY=2):
72 'od.__iter__() <==> iter(od)'
73 # Traverse the linked list in order.
74 root = self.__root
75 curr = root[NEXT]
76 while curr is not root:
77 yield curr[KEY]
78 curr = curr[NEXT]
80 def __reversed__(self, PREV=0, KEY=2):
81 'od.__reversed__() <==> reversed(od)'
82 # Traverse the linked list in reverse order.
83 root = self.__root
84 curr = root[PREV]
85 while curr is not root:
86 yield curr[KEY]
87 curr = curr[PREV]
89 def __reduce__(self):
90 'Return state information for pickling'
91 items = [[k, self[k]] for k in self]
92 tmp = self.__map, self.__root
93 del self.__map, self.__root
94 inst_dict = vars(self).copy()
95 self.__map, self.__root = tmp
96 if inst_dict:
97 return (self.__class__, (items,), inst_dict)
98 return self.__class__, (items,)
100 def clear(self):
101 'od.clear() -> None. Remove all items from od.'
102 try:
103 for node in self.__map.itervalues():
104 del node[:]
105 self.__root[:] = [self.__root, self.__root, None]
106 self.__map.clear()
107 except AttributeError:
108 pass
109 dict.clear(self)
111 setdefault = MutableMapping.setdefault
112 update = MutableMapping.update
113 pop = MutableMapping.pop
114 keys = MutableMapping.keys
115 values = MutableMapping.values
116 items = MutableMapping.items
117 iterkeys = MutableMapping.iterkeys
118 itervalues = MutableMapping.itervalues
119 iteritems = MutableMapping.iteritems
120 __ne__ = MutableMapping.__ne__
122 def popitem(self, last=True):
123 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
124 Pairs are returned in LIFO order if last is true or FIFO order if false.
127 if not self:
128 raise KeyError('dictionary is empty')
129 key = next(reversed(self) if last else iter(self))
130 value = self.pop(key)
131 return key, value
133 def __repr__(self):
134 'od.__repr__() <==> repr(od)'
135 if not self:
136 return '%s()' % (self.__class__.__name__,)
137 return '%s(%r)' % (self.__class__.__name__, self.items())
139 def copy(self):
140 'od.copy() -> a shallow copy of od'
141 return self.__class__(self)
143 @classmethod
144 def fromkeys(cls, iterable, value=None):
145 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
146 and values equal to v (which defaults to None).
149 d = cls()
150 for key in iterable:
151 d[key] = value
152 return d
154 def __eq__(self, other):
155 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
156 while comparison to a regular mapping is order-insensitive.
159 if isinstance(other, OrderedDict):
160 return len(self)==len(other) and \
161 all(_imap(_eq, self.iteritems(), other.iteritems()))
162 return dict.__eq__(self, other)
164 def __del__(self):
165 self.clear() # eliminate cyclical references
168 ################################################################################
169 ### namedtuple
170 ################################################################################
172 def namedtuple(typename, field_names, verbose=False, rename=False):
173 """Returns a new subclass of tuple with named fields.
175 >>> Point = namedtuple('Point', 'x y')
176 >>> Point.__doc__ # docstring for the new class
177 'Point(x, y)'
178 >>> p = Point(11, y=22) # instantiate with positional args or keywords
179 >>> p[0] + p[1] # indexable like a plain tuple
181 >>> x, y = p # unpack like a regular tuple
182 >>> x, y
183 (11, 22)
184 >>> p.x + p.y # fields also accessable by name
186 >>> d = p._asdict() # convert to a dictionary
187 >>> d['x']
189 >>> Point(**d) # convert from a dictionary
190 Point(x=11, y=22)
191 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
192 Point(x=100, y=22)
196 # Parse and validate the field names. Validation serves two purposes,
197 # generating informative error messages and preventing template injection attacks.
198 if isinstance(field_names, basestring):
199 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
200 field_names = tuple(map(str, field_names))
201 if rename:
202 names = list(field_names)
203 seen = set()
204 for i, name in enumerate(names):
205 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
206 or not name or name[0].isdigit() or name.startswith('_')
207 or name in seen):
208 names[i] = '_%d' % i
209 seen.add(name)
210 field_names = tuple(names)
211 for name in (typename,) + field_names:
212 if not all(c.isalnum() or c=='_' for c in name):
213 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
214 if _iskeyword(name):
215 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
216 if name[0].isdigit():
217 raise ValueError('Type names and field names cannot start with a number: %r' % name)
218 seen_names = set()
219 for name in field_names:
220 if name.startswith('_') and not rename:
221 raise ValueError('Field names cannot start with an underscore: %r' % name)
222 if name in seen_names:
223 raise ValueError('Encountered duplicate field name: %r' % name)
224 seen_names.add(name)
226 # Create and fill-in the class template
227 numfields = len(field_names)
228 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
229 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
230 template = '''class %(typename)s(tuple):
231 '%(typename)s(%(argtxt)s)' \n
232 __slots__ = () \n
233 _fields = %(field_names)r \n
234 def __new__(_cls, %(argtxt)s):
235 'Create new instance of %(typename)s(%(argtxt)s)'
236 return _tuple.__new__(_cls, (%(argtxt)s)) \n
237 @classmethod
238 def _make(cls, iterable, new=tuple.__new__, len=len):
239 'Make a new %(typename)s object from a sequence or iterable'
240 result = new(cls, iterable)
241 if len(result) != %(numfields)d:
242 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
243 return result \n
244 def __repr__(self):
245 'Return a nicely formatted representation string'
246 return '%(typename)s(%(reprtxt)s)' %% self \n
247 def _asdict(self):
248 'Return a new OrderedDict which maps field names to their values'
249 return OrderedDict(zip(self._fields, self)) \n
250 def _replace(_self, **kwds):
251 'Return a new %(typename)s object replacing specified fields with new values'
252 result = _self._make(map(kwds.pop, %(field_names)r, _self))
253 if kwds:
254 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
255 return result \n
256 def __getnewargs__(self):
257 'Return self as a plain tuple. Used by copy and pickle.'
258 return tuple(self) \n\n''' % locals()
259 for i, name in enumerate(field_names):
260 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
261 if verbose:
262 print template
264 # Execute the template string in a temporary namespace and
265 # support tracing utilities by setting a value for frame.f_globals['__name__']
266 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
267 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
268 try:
269 exec template in namespace
270 except SyntaxError, e:
271 raise SyntaxError(e.message + ':\n' + template)
272 result = namespace[typename]
274 # For pickling to work, the __module__ variable needs to be set to the frame
275 # where the named tuple is created. Bypass this step in enviroments where
276 # sys._getframe is not defined (Jython for example) or sys._getframe is not
277 # defined for arguments greater than 0 (IronPython).
278 try:
279 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
280 except (AttributeError, ValueError):
281 pass
283 return result
286 ########################################################################
287 ### Counter
288 ########################################################################
290 class Counter(dict):
291 '''Dict subclass for counting hashable items. Sometimes called a bag
292 or multiset. Elements are stored as dictionary keys and their counts
293 are stored as dictionary values.
295 >>> c = Counter('abracadabra') # count elements from a string
297 >>> c.most_common(3) # three most common elements
298 [('a', 5), ('r', 2), ('b', 2)]
299 >>> sorted(c) # list all unique elements
300 ['a', 'b', 'c', 'd', 'r']
301 >>> ''.join(sorted(c.elements())) # list elements with repetitions
302 'aaaaabbcdrr'
303 >>> sum(c.values()) # total of all counts
306 >>> c['a'] # count of letter 'a'
308 >>> for elem in 'shazam': # update counts from an iterable
309 ... c[elem] += 1 # by adding 1 to each element's count
310 >>> c['a'] # now there are seven 'a'
312 >>> del c['r'] # remove all 'r'
313 >>> c['r'] # now there are zero 'r'
316 >>> d = Counter('simsalabim') # make another counter
317 >>> c.update(d) # add in the second counter
318 >>> c['a'] # now there are nine 'a'
321 >>> c.clear() # empty the counter
322 >>> c
323 Counter()
325 Note: If a count is set to zero or reduced to zero, it will remain
326 in the counter until the entry is deleted or the counter is cleared:
328 >>> c = Counter('aaabbc')
329 >>> c['b'] -= 2 # reduce the count of 'b' by two
330 >>> c.most_common() # 'b' is still in, but its count is zero
331 [('a', 3), ('c', 1), ('b', 0)]
334 # References:
335 # http://en.wikipedia.org/wiki/Multiset
336 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
337 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
338 # http://code.activestate.com/recipes/259174/
339 # Knuth, TAOCP Vol. II section 4.6.3
341 def __init__(self, iterable=None, **kwds):
342 '''Create a new, empty Counter object. And if given, count elements
343 from an input iterable. Or, initialize the count from another mapping
344 of elements to their counts.
346 >>> c = Counter() # a new, empty counter
347 >>> c = Counter('gallahad') # a new counter from an iterable
348 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
349 >>> c = Counter(a=4, b=2) # a new counter from keyword args
352 self.update(iterable, **kwds)
354 def __missing__(self, key):
355 'The count of elements not in the Counter is zero.'
356 # Needed so that self[missing_item] does not raise KeyError
357 return 0
359 def most_common(self, n=None):
360 '''List the n most common elements and their counts from the most
361 common to the least. If n is None, then list all element counts.
363 >>> Counter('abracadabra').most_common(3)
364 [('a', 5), ('r', 2), ('b', 2)]
367 # Emulate Bag.sortedByCount from Smalltalk
368 if n is None:
369 return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
370 return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
372 def elements(self):
373 '''Iterator over elements repeating each as many times as its count.
375 >>> c = Counter('ABCABC')
376 >>> sorted(c.elements())
377 ['A', 'A', 'B', 'B', 'C', 'C']
379 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
380 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
381 >>> product = 1
382 >>> for factor in prime_factors.elements(): # loop over factors
383 ... product *= factor # and multiply them
384 >>> product
385 1836
387 Note, if an element's count has been set to zero or is a negative
388 number, elements() will ignore it.
391 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
392 return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
394 # Override dict methods where necessary
396 @classmethod
397 def fromkeys(cls, iterable, v=None):
398 # There is no equivalent method for counters because setting v=1
399 # means that no element can have a count greater than one.
400 raise NotImplementedError(
401 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
403 def update(self, iterable=None, **kwds):
404 '''Like dict.update() but add counts instead of replacing them.
406 Source can be an iterable, a dictionary, or another Counter instance.
408 >>> c = Counter('which')
409 >>> c.update('witch') # add elements from another iterable
410 >>> d = Counter('watch')
411 >>> c.update(d) # add elements from another counter
412 >>> c['h'] # four 'h' in which, witch, and watch
416 # The regular dict.update() operation makes no sense here because the
417 # replace behavior results in the some of original untouched counts
418 # being mixed-in with all of the other counts for a mismash that
419 # doesn't have a straight-forward interpretation in most counting
420 # contexts. Instead, we implement straight-addition. Both the inputs
421 # and outputs are allowed to contain zero and negative counts.
423 if iterable is not None:
424 if isinstance(iterable, Mapping):
425 if self:
426 self_get = self.get
427 for elem, count in iterable.iteritems():
428 self[elem] = self_get(elem, 0) + count
429 else:
430 dict.update(self, iterable) # fast path when counter is empty
431 else:
432 self_get = self.get
433 for elem in iterable:
434 self[elem] = self_get(elem, 0) + 1
435 if kwds:
436 self.update(kwds)
438 def subtract(self, iterable=None, **kwds):
439 '''Like dict.update() but subtracts counts instead of replacing them.
440 Counts can be reduced below zero. Both the inputs and outputs are
441 allowed to contain zero and negative counts.
443 Source can be an iterable, a dictionary, or another Counter instance.
445 >>> c = Counter('which')
446 >>> c.subtract('witch') # subtract elements from another iterable
447 >>> c.subtract(Counter('watch')) # subtract elements from another counter
448 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
450 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
454 if iterable is not None:
455 self_get = self.get
456 if isinstance(iterable, Mapping):
457 for elem, count in iterable.items():
458 self[elem] = self_get(elem, 0) - count
459 else:
460 for elem in iterable:
461 self[elem] = self_get(elem, 0) - 1
462 if kwds:
463 self.subtract(kwds)
465 def copy(self):
466 'Like dict.copy() but returns a Counter instance instead of a dict.'
467 return Counter(self)
469 def __delitem__(self, elem):
470 'Like dict.__delitem__() but does not raise KeyError for missing values.'
471 if elem in self:
472 dict.__delitem__(self, elem)
474 def __repr__(self):
475 if not self:
476 return '%s()' % self.__class__.__name__
477 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
478 return '%s({%s})' % (self.__class__.__name__, items)
480 # Multiset-style mathematical operations discussed in:
481 # Knuth TAOCP Volume II section 4.6.3 exercise 19
482 # and at http://en.wikipedia.org/wiki/Multiset
484 # Outputs guaranteed to only include positive counts.
486 # To strip negative and zero counts, add-in an empty counter:
487 # c += Counter()
489 def __add__(self, other):
490 '''Add counts from two counters.
492 >>> Counter('abbb') + Counter('bcc')
493 Counter({'b': 4, 'c': 2, 'a': 1})
496 if not isinstance(other, Counter):
497 return NotImplemented
498 result = Counter()
499 for elem in set(self) | set(other):
500 newcount = self[elem] + other[elem]
501 if newcount > 0:
502 result[elem] = newcount
503 return result
505 def __sub__(self, other):
506 ''' Subtract count, but keep only results with positive counts.
508 >>> Counter('abbbc') - Counter('bccd')
509 Counter({'b': 2, 'a': 1})
512 if not isinstance(other, Counter):
513 return NotImplemented
514 result = Counter()
515 for elem in set(self) | set(other):
516 newcount = self[elem] - other[elem]
517 if newcount > 0:
518 result[elem] = newcount
519 return result
521 def __or__(self, other):
522 '''Union is the maximum of value in either of the input counters.
524 >>> Counter('abbb') | Counter('bcc')
525 Counter({'b': 3, 'c': 2, 'a': 1})
528 if not isinstance(other, Counter):
529 return NotImplemented
530 result = Counter()
531 for elem in set(self) | set(other):
532 p, q = self[elem], other[elem]
533 newcount = q if p < q else p
534 if newcount > 0:
535 result[elem] = newcount
536 return result
538 def __and__(self, other):
539 ''' Intersection is the minimum of corresponding counts.
541 >>> Counter('abbb') & Counter('bcc')
542 Counter({'b': 1})
545 if not isinstance(other, Counter):
546 return NotImplemented
547 result = Counter()
548 if len(self) < len(other):
549 self, other = other, self
550 for elem in _ifilter(self.__contains__, other):
551 p, q = self[elem], other[elem]
552 newcount = p if p < q else q
553 if newcount > 0:
554 result[elem] = newcount
555 return result
558 if __name__ == '__main__':
559 # verify that instances can be pickled
560 from cPickle import loads, dumps
561 Point = namedtuple('Point', 'x, y', True)
562 p = Point(x=10, y=20)
563 assert p == loads(dumps(p))
565 # test and demonstrate ability to override methods
566 class Point(namedtuple('Point', 'x y')):
567 __slots__ = ()
568 @property
569 def hypot(self):
570 return (self.x ** 2 + self.y ** 2) ** 0.5
571 def __str__(self):
572 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
574 for p in Point(3, 4), Point(14, 5/7.):
575 print p
577 class Point(namedtuple('Point', 'x y')):
578 'Point class with optimized _make() and _replace() without error-checking'
579 __slots__ = ()
580 _make = classmethod(tuple.__new__)
581 def _replace(self, _map=map, **kwds):
582 return self._make(_map(kwds.get, ('x', 'y'), self))
584 print Point(11, 22)._replace(x=100)
586 Point3D = namedtuple('Point3D', Point._fields + ('z',))
587 print Point3D.__doc__
589 import doctest
590 TestResults = namedtuple('TestResults', 'failed attempted')
591 print TestResults(*doctest.testmod())