move and split design docs
[mygpo.git] / mygpo / counter.py
blob172c3d625495822f92d14c6988af2488659a7da1
1 # from http://code.activestate.com/recipes/576611-counter-class/
2 from operator import itemgetter
3 from heapq import nlargest
4 from itertools import repeat, ifilter
6 class Counter(dict):
7 '''Dict subclass for counting hashable objects. Sometimes called a bag
8 or multiset. Elements are stored as dictionary keys and their counts
9 are stored as dictionary values.
11 >>> Counter('zyzygy')
12 Counter({'y': 3, 'z': 2, 'g': 1})
14 '''
16 def __init__(self, iterable=None, **kwds):
17 '''Create a new, empty Counter object. And if given, count elements
18 from an input iterable. Or, initialize the count from another mapping
19 of elements to their counts.
21 >>> c = Counter() # a new, empty counter
22 >>> c = Counter('gallahad') # a new counter from an iterable
23 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
24 >>> c = Counter(a=4, b=2) # a new counter from keyword args
26 '''
27 self.update(iterable, **kwds)
29 def __missing__(self, key):
30 return 0
32 def most_common(self, n=None):
33 '''List the n most common elements and their counts from the most
34 common to the least. If n is None, then list all element counts.
36 >>> Counter('abracadabra').most_common(3)
37 [('a', 5), ('r', 2), ('b', 2)]
39 '''
40 if n is None:
41 return sorted(self.iteritems(), key=itemgetter(1), reverse=True)
42 return nlargest(n, self.iteritems(), key=itemgetter(1))
44 def elements(self):
45 '''Iterator over elements repeating each as many times as its count.
47 >>> c = Counter('ABCABC')
48 >>> sorted(c.elements())
49 ['A', 'A', 'B', 'B', 'C', 'C']
51 If an element's count has been set to zero or is a negative number,
52 elements() will ignore it.
54 '''
55 for elem, count in self.iteritems():
56 for _ in repeat(None, count):
57 yield elem
59 # Override dict methods where the meaning changes for Counter objects.
61 @classmethod
62 def fromkeys(cls, iterable, v=None):
63 raise NotImplementedError(
64 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
66 def update(self, iterable=None, **kwds):
67 '''Like dict.update() but add counts instead of replacing them.
69 Source can be an iterable, a dictionary, or another Counter instance.
71 >>> c = Counter('which')
72 >>> c.update('witch') # add elements from another iterable
73 >>> d = Counter('watch')
74 >>> c.update(d) # add elements from another counter
75 >>> c['h'] # four 'h' in which, witch, and watch
78 '''
79 if iterable is not None:
80 if hasattr(iterable, 'iteritems'):
81 if self:
82 self_get = self.get
83 for elem, count in iterable.iteritems():
84 self[elem] = self_get(elem, 0) + count
85 else:
86 dict.update(self, iterable) # fast path when counter is empty
87 else:
88 self_get = self.get
89 for elem in iterable:
90 self[elem] = self_get(elem, 0) + 1
91 if kwds:
92 self.update(kwds)
94 def copy(self):
95 'Like dict.copy() but returns a Counter instance instead of a dict.'
96 return Counter(self)
98 def __delitem__(self, elem):
99 'Like dict.__delitem__() but does not raise KeyError for missing values.'
100 if elem in self:
101 dict.__delitem__(self, elem)
103 def __repr__(self):
104 if not self:
105 return '%s()' % self.__class__.__name__
106 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
107 return '%s({%s})' % (self.__class__.__name__, items)
109 # Multiset-style mathematical operations discussed in:
110 # Knuth TAOCP Volume II section 4.6.3 exercise 19
111 # and at http://en.wikipedia.org/wiki/Multiset
113 # Outputs guaranteed to only include positive counts.
115 # To strip negative and zero counts, add-in an empty counter:
116 # c += Counter()
118 def __add__(self, other):
119 '''Add counts from two counters.
121 >>> Counter('abbb') + Counter('bcc')
122 Counter({'b': 4, 'c': 2, 'a': 1})
126 if not isinstance(other, Counter):
127 return NotImplemented
128 result = Counter()
129 for elem in set(self) | set(other):
130 newcount = self[elem] + other[elem]
131 if newcount > 0:
132 result[elem] = newcount
133 return result
135 def __sub__(self, other):
136 ''' Subtract count, but keep only results with positive counts.
138 >>> Counter('abbbc') - Counter('bccd')
139 Counter({'b': 2, 'a': 1})
142 if not isinstance(other, Counter):
143 return NotImplemented
144 result = Counter()
145 for elem in set(self) | set(other):
146 newcount = self[elem] - other[elem]
147 if newcount > 0:
148 result[elem] = newcount
149 return result
151 def __or__(self, other):
152 '''Union is the maximum of value in either of the input counters.
154 >>> Counter('abbb') | Counter('bcc')
155 Counter({'b': 3, 'c': 2, 'a': 1})
158 if not isinstance(other, Counter):
159 return NotImplemented
160 _max = max
161 result = Counter()
162 for elem in set(self) | set(other):
163 newcount = _max(self[elem], other[elem])
164 if newcount > 0:
165 result[elem] = newcount
166 return result
168 def __and__(self, other):
169 ''' Intersection is the minimum of corresponding counts.
171 >>> Counter('abbb') & Counter('bcc')
172 Counter({'b': 1})
175 if not isinstance(other, Counter):
176 return NotImplemented
177 _min = min
178 result = Counter()
179 if len(self) < len(other):
180 self, other = other, self
181 for elem in ifilter(self.__contains__, other):
182 newcount = _min(self[elem], other[elem])
183 if newcount > 0:
184 result[elem] = newcount
185 return result