1 __all__
= ['Counter', 'deque', 'defaultdict', 'namedtuple']
2 # For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3 # They should however be considered an integral part of collections.py.
6 __all__
+= _abcoll
.__all
__
8 from _collections
import deque
, defaultdict
9 from operator
import itemgetter
as _itemgetter
10 from keyword
import iskeyword
as _iskeyword
12 import heapq
as _heapq
13 from itertools
import repeat
as _repeat
, chain
as _chain
, starmap
as _starmap
, ifilter
as _ifilter
15 ################################################################################
17 ################################################################################
19 def namedtuple(typename
, field_names
, verbose
=False):
20 """Returns a new subclass of tuple with named fields.
22 >>> Point = namedtuple('Point', 'x y')
23 >>> Point.__doc__ # docstring for the new class
25 >>> p = Point(11, y=22) # instantiate with positional args or keywords
26 >>> p[0] + p[1] # indexable like a plain tuple
28 >>> x, y = p # unpack like a regular tuple
31 >>> p.x + p.y # fields also accessable by name
33 >>> d = p._asdict() # convert to a dictionary
36 >>> Point(**d) # convert from a dictionary
38 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
43 # Parse and validate the field names. Validation serves two purposes,
44 # generating informative error messages and preventing template injection attacks.
45 if isinstance(field_names
, basestring
):
46 field_names
= field_names
.replace(',', ' ').split() # names separated by whitespace and/or commas
47 field_names
= tuple(map(str, field_names
))
48 for name
in (typename
,) + field_names
:
49 if not all(c
.isalnum() or c
=='_' for c
in name
):
50 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name
)
52 raise ValueError('Type names and field names cannot be a keyword: %r' % name
)
54 raise ValueError('Type names and field names cannot start with a number: %r' % name
)
56 for name
in field_names
:
57 if name
.startswith('_'):
58 raise ValueError('Field names cannot start with an underscore: %r' % name
)
59 if name
in seen_names
:
60 raise ValueError('Encountered duplicate field name: %r' % name
)
63 # Create and fill-in the class template
64 numfields
= len(field_names
)
65 argtxt
= repr(field_names
).replace("'", "")[1:-1] # tuple repr without parens or quotes
66 reprtxt
= ', '.join('%s=%%r' % name
for name
in field_names
)
67 dicttxt
= ', '.join('%r: t[%d]' % (name
, pos
) for pos
, name
in enumerate(field_names
))
68 template
= '''class %(typename)s(tuple):
69 '%(typename)s(%(argtxt)s)' \n
71 _fields = %(field_names)r \n
72 def __new__(cls, %(argtxt)s):
73 return tuple.__new__(cls, (%(argtxt)s)) \n
75 def _make(cls, iterable, new=tuple.__new__, len=len):
76 'Make a new %(typename)s object from a sequence or iterable'
77 result = new(cls, iterable)
78 if len(result) != %(numfields)d:
79 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
82 return '%(typename)s(%(reprtxt)s)' %% self \n
84 'Return a new dict which maps field names to their values'
85 return {%(dicttxt)s} \n
86 def _replace(self, **kwds):
87 'Return a new %(typename)s object replacing specified fields with new values'
88 result = self._make(map(kwds.pop, %(field_names)r, self))
90 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
92 def __getnewargs__(self):
93 return tuple(self) \n\n''' % locals()
94 for i
, name
in enumerate(field_names
):
95 template
+= ' %s = property(itemgetter(%d))\n' % (name
, i
)
99 # Execute the template string in a temporary namespace and
100 # support tracing utilities by setting a value for frame.f_globals['__name__']
101 namespace
= dict(itemgetter
=_itemgetter
, __name__
='namedtuple_%s' % typename
)
103 exec template
in namespace
104 except SyntaxError, e
:
105 raise SyntaxError(e
.message
+ ':\n' + template
)
106 result
= namespace
[typename
]
108 # For pickling to work, the __module__ variable needs to be set to the frame
109 # where the named tuple is created. Bypass this step in enviroments where
110 # sys._getframe is not defined (Jython for example).
111 if hasattr(_sys
, '_getframe'):
112 result
.__module
__ = _sys
._getframe
(1).f_globals
.get('__name__', '__main__')
117 ########################################################################
119 ########################################################################
122 '''Dict subclass for counting hashable items. Sometimes called a bag
123 or multiset. Elements are stored as dictionary keys and their counts
124 are stored as dictionary values.
126 >>> c = Counter('abracadabra') # count elements from a string
128 >>> c.most_common(3) # three most common elements
129 [('a', 5), ('r', 2), ('b', 2)]
130 >>> sorted(c) # list all unique elements
131 ['a', 'b', 'c', 'd', 'r']
132 >>> ''.join(sorted(c.elements())) # list elements with repetitions
134 >>> sum(c.values()) # total of all counts
137 >>> c['a'] # count of letter 'a'
139 >>> for elem in 'shazam': # update counts from an iterable
140 ... c[elem] += 1 # by adding 1 to each element's count
141 >>> c['a'] # now there are seven 'a'
143 >>> del c['r'] # remove all 'r'
144 >>> c['r'] # now there are zero 'r'
147 >>> d = Counter('simsalabim') # make another counter
148 >>> c.update(d) # add in the second counter
149 >>> c['a'] # now there are nine 'a'
152 >>> c.clear() # empty the counter
156 Note: If a count is set to zero or reduced to zero, it will remain
157 in the counter until the entry is deleted or the counter is cleared:
159 >>> c = Counter('aaabbc')
160 >>> c['b'] -= 2 # reduce the count of 'b' by two
161 >>> c.most_common() # 'b' is still in, but its count is zero
162 [('a', 3), ('c', 1), ('b', 0)]
166 # http://en.wikipedia.org/wiki/Multiset
167 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
168 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
169 # http://code.activestate.com/recipes/259174/
170 # Knuth, TAOCP Vol. II section 4.6.3
172 def __init__(self
, iterable
=None, **kwds
):
173 '''Create a new, empty Counter object. And if given, count elements
174 from an input iterable. Or, initialize the count from another mapping
175 of elements to their counts.
177 >>> c = Counter() # a new, empty counter
178 >>> c = Counter('gallahad') # a new counter from an iterable
179 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
180 >>> c = Counter(a=4, b=2) # a new counter from keyword args
183 self
.update(iterable
, **kwds
)
185 def __missing__(self
, key
):
186 'The count of elements not in the Counter is zero.'
187 # Needed so that self[missing_item] does not raise KeyError
190 def most_common(self
, n
=None):
191 '''List the n most common elements and their counts from the most
192 common to the least. If n is None, then list all element counts.
194 >>> Counter('abracadabra').most_common(3)
195 [('a', 5), ('r', 2), ('b', 2)]
198 # Emulate Bag.sortedByCount from Smalltalk
200 return sorted(self
.iteritems(), key
=_itemgetter(1), reverse
=True)
201 return _heapq
.nlargest(n
, self
.iteritems(), key
=_itemgetter(1))
204 '''Iterator over elements repeating each as many times as its count.
206 >>> c = Counter('ABCABC')
207 >>> sorted(c.elements())
208 ['A', 'A', 'B', 'B', 'C', 'C']
210 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
211 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
213 >>> for factor in prime_factors.elements(): # loop over factors
214 ... product *= factor # and multiply them
218 Note, if an element's count has been set to zero or is a negative
219 number, elements() will ignore it.
222 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
223 return _chain
.from_iterable(_starmap(_repeat
, self
.iteritems()))
225 # Override dict methods where necessary
228 def fromkeys(cls
, iterable
, v
=None):
229 # There is no equivalent method for counters because setting v=1
230 # means that no element can have a count greater than one.
231 raise NotImplementedError(
232 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
234 def update(self
, iterable
=None, **kwds
):
235 '''Like dict.update() but add counts instead of replacing them.
237 Source can be an iterable, a dictionary, or another Counter instance.
239 >>> c = Counter('which')
240 >>> c.update('witch') # add elements from another iterable
241 >>> d = Counter('watch')
242 >>> c.update(d) # add elements from another counter
243 >>> c['h'] # four 'h' in which, witch, and watch
247 # The regular dict.update() operation makes no sense here because the
248 # replace behavior results in the some of original untouched counts
249 # being mixed-in with all of the other counts for a mismash that
250 # doesn't have a straight-forward interpretation in most counting
251 # contexts. Instead, we implement straight-addition. Both the inputs
252 # and outputs are allowed to contain zero and negative counts.
254 if iterable
is not None:
255 if isinstance(iterable
, Mapping
):
257 for elem
, count
in iterable
.iteritems():
260 dict.update(self
, iterable
) # fast path when counter is empty
262 for elem
in iterable
:
268 'Like dict.copy() but returns a Counter instance instead of a dict.'
271 def __delitem__(self
, elem
):
272 'Like dict.__delitem__() but does not raise KeyError for missing values.'
274 dict.__delitem
__(self
, elem
)
278 return '%s()' % self
.__class
__.__name
__
279 items
= ', '.join(map('%r: %r'.__mod
__, self
.most_common()))
280 return '%s({%s})' % (self
.__class
__.__name
__, items
)
282 # Multiset-style mathematical operations discussed in:
283 # Knuth TAOCP Volume II section 4.6.3 exercise 19
284 # and at http://en.wikipedia.org/wiki/Multiset
286 # Outputs guaranteed to only include positive counts.
288 # To strip negative and zero counts, add-in an empty counter:
291 def __add__(self
, other
):
292 '''Add counts from two counters.
294 >>> Counter('abbb') + Counter('bcc')
295 Counter({'b': 4, 'c': 2, 'a': 1})
298 if not isinstance(other
, Counter
):
299 return NotImplemented
301 for elem
in set(self
) |
set(other
):
302 newcount
= self
[elem
] + other
[elem
]
304 result
[elem
] = newcount
307 def __sub__(self
, other
):
308 ''' Subtract count, but keep only results with positive counts.
310 >>> Counter('abbbc') - Counter('bccd')
311 Counter({'b': 2, 'a': 1})
314 if not isinstance(other
, Counter
):
315 return NotImplemented
317 for elem
in set(self
) |
set(other
):
318 newcount
= self
[elem
] - other
[elem
]
320 result
[elem
] = newcount
323 def __or__(self
, other
):
324 '''Union is the maximum of value in either of the input counters.
326 >>> Counter('abbb') | Counter('bcc')
327 Counter({'b': 3, 'c': 2, 'a': 1})
330 if not isinstance(other
, Counter
):
331 return NotImplemented
334 for elem
in set(self
) |
set(other
):
335 newcount
= _max(self
[elem
], other
[elem
])
337 result
[elem
] = newcount
340 def __and__(self
, other
):
341 ''' Intersection is the minimum of corresponding counts.
343 >>> Counter('abbb') & Counter('bcc')
347 if not isinstance(other
, Counter
):
348 return NotImplemented
351 if len(self
) < len(other
):
352 self
, other
= other
, self
353 for elem
in _ifilter(self
.__contains
__, other
):
354 newcount
= _min(self
[elem
], other
[elem
])
356 result
[elem
] = newcount
360 if __name__
== '__main__':
361 # verify that instances can be pickled
362 from cPickle
import loads
, dumps
363 Point
= namedtuple('Point', 'x, y', True)
364 p
= Point(x
=10, y
=20)
365 assert p
== loads(dumps(p
))
367 # test and demonstrate ability to override methods
368 class Point(namedtuple('Point', 'x y')):
372 return (self
.x
** 2 + self
.y
** 2) ** 0.5
374 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self
.x
, self
.y
, self
.hypot
)
376 for p
in Point(3, 4), Point(14, 5/7.):
379 class Point(namedtuple('Point', 'x y')):
380 'Point class with optimized _make() and _replace() without error-checking'
382 _make
= classmethod(tuple.__new
__)
383 def _replace(self
, _map
=map, **kwds
):
384 return self
._make
(_map(kwds
.get
, ('x', 'y'), self
))
386 print Point(11, 22)._replace
(x
=100)
388 Point3D
= namedtuple('Point3D', Point
._fields
+ ('z',))
389 print Point3D
.__doc
__
392 TestResults
= namedtuple('TestResults', 'failed attempted')
393 print TestResults(*doctest
.testmod())