gostyle: the basic library, intitial commit.
[gostyle.git] / utils / utils.py
blob14dd32af8351b98beff0d596b671ba0747dc7118
1 import re
2 import subprocess
3 import os
4 import copy
5 from os.path import abspath, exists
6 import shutil
7 import functools
8 import inspect
9 import itertools
10 import numpy
12 import misc
13 from colors import BlackWhite
14 import types
16 VIEWER_LIST=['qgo', 'kombilo']
18 def viewer_open(sgf_filename, executable=VIEWER_LIST[1]):
19 p = subprocess.Popen([executable, sgf_filename])
20 return
22 def bark():
23 subprocess.call('bark', shell=True)
25 def check_output(*args, **kwargs):
26 if hasattr(subprocess, 'check_output'):
27 return subprocess.check_output(*args, **kwargs)
28 else:
29 if 'stdout' in kwargs:
30 raise ValueError('stdout argument not allowed, it will be overridden.')
31 process = subprocess.Popen(stdout=subprocess.PIPE, *args, **kwargs)
32 output, unused_err = process.communicate()
33 retcode = process.poll()
34 if retcode:
35 cmd = kwargs.get("args")
36 if cmd is None:
37 cmd = args[0]
38 raise subprocess.CalledProcessError(retcode, cmd, output=output)
39 return output
41 def get_year(datestr, match_century=True):
42 """Function trying to extract date from a string - usually a DT field of a sgf_file.
43 First, look for the first year string in the @datestr.
44 if not found and the @match_century is true, we look for strings like
45 "18th century", which results in year 1750 (the mean of years in 18th century)
47 Returns None if no year found.
48 """
49 # 1982-10-10
50 # note the non-greedy .*? expansion => the first date string in the result gets matched
51 # that is get_year("1982 1999") = 1982
52 match = re.match( '.*?([0-9]{4}).*', datestr)
53 if match:
54 return int(match.group(1))
56 if match_century:
57 # 17th century, 18th c.
58 match = re.match( '.*[^0-9]?([0-9]{2}th c).*', datestr)
59 if match:
60 century = int(match.group(1)[:2])
61 # returns "mean" year of the century:
62 # 17th century -> 1650
63 return century * 100 - 50
65 return None
67 def get_poly_s(coefs):
68 """Returns a string with polynomial equation; e.g.:
70 >>> from utils import get_poly_s
71 >>> get_poly_s([0.5,0,4])
72 'y = 0.50x^2 + 4.00'
73 >>> get_poly_s([1,2,3,4])
74 'y = 1.00x^3 + 2.00x^2 + 3.00x + 4.00'
75 """
76 C = []
77 for pw, co in enumerate(reversed(coefs)):
78 if co:
79 s = "%.2f" % co
80 if pw:
81 s += 'x'
82 if pw > 1:
83 s += '^%d' % pw
84 C.append(s)
85 return 'y = ' + ' + '.join(reversed(C))
88 """
90 class ReprWrapper(object):
91 def __init__(self, repr_f, f):
92 self.repr_f = repr_f
93 self.f = f
94 functools.update_wrapper(self, f)
95 def __call__(self, *args, **kwargs):
96 return self.f(*args, **kwargs)
97 def __repr__(self):
98 return self.repr_f(self.f)
99 """
101 def repr_origin(f):
102 if hasattr(f, 'im_class'):
103 prefix = f.im_class
104 else:
105 prefix = f.__module__
106 return "%s.%s"%(prefix, f.__name__)
108 def head(f, n=10):
109 print f.filename
110 with open(f.filename, 'r') as fin:
111 for line in itertools.islice(fin, n):
112 print line
114 def iter_splits(l, parts=None, max_size=None, min_size=None):
115 """Will yield consequent sublist of the @l list, trying to result
116 evenly sized sublists. Exactly one of the parameters @parts or
117 @max_size or @min_size must be specified.
119 specifiing parts = N will yield N sublists of (almost) even size. The
120 list size difference is guaranted to be at most 1.
122 >>> list(iter_splits(range(5), parts=2))
123 [[0, 1, 2], [3, 4]]
124 >>> list(iter_splits(range(5), parts=4))
125 [[0, 1], [2], [3], [4]]
129 specifiing max_size = N returns the smallest possible number of
130 consequent sublists so that whole list is divided and size of each
131 part is <= N
133 >>> list(iter_splits(range(5), max_size=3))
134 [[0, 1, 2], [3, 4]]
135 >>> list(iter_splits(range(5), max_size=10))
136 [[0, 1, 2, 3, 4]]
138 Calling iter_splits(l, max_size=N) is just a shorthand for calling
139 iter_splits(l, parts=len(l) / N + bool(len(l)% N) )
143 Similarly min_size = N returns the largest possible number of
144 consequent sublists so that whole list is divided and size of each
145 part is >= N
147 Calling iter_splits(l, min_size=N) is just a shorthand for calling
148 iter_splits(l, parts=len(l) / N )
150 if bool(parts) + bool(max_size) + bool( min_size) != 1:
151 raise TypeError('Exactly one of parts, max_size or exact_size arguments must be specified (and nonzero)')
153 if parts:
154 print parts
155 pn, rest = len(l) / parts, len(l) % parts
156 if pn == 0:
157 raise ValueError("Number of parts to split must not be larger than the number of elements.")
159 def sizes(pn, rest):
160 for i in xrange(parts):
161 if rest:
162 yield pn + 1
163 rest -= 1
164 else:
165 yield pn
167 stop = 0
168 for size in sizes(pn, rest):
169 start, stop = stop, stop + size
170 yield l[start: stop]
172 if max_size:
173 pn, rest = len(l) / max_size, len(l) % max_size
174 if rest:
175 pn += 1
176 for split in iter_splits(l, parts=pn):
177 yield split
179 if min_size:
180 for split in iter_splits(l, parts=len(l)/min_size):
181 yield split
183 def iter_exact_splits(l, split_size):
184 tail = copy.copy(l)
186 while tail:
187 head, tail = tail[:split_size], tail[split_size:]
188 # the last head could be shorter
189 if len(head) == split_size:
190 yield head
192 def pearson_coef(vec1, vec2):
193 assert vec1.shape == vec2.shape
194 def norm(vec):
195 return numpy.sqrt((vec*vec).sum())
196 def center(vec):
197 return vec - vec.mean()
198 vec1, vec2 = center(vec1), center(vec2)
199 return (vec1 * vec2).sum() / (norm(vec1) * norm(vec2))
202 if __name__ == '__main__':
203 def test_split():
204 l = range(20)
206 for kw in ['parts', 'max_size', 'min_size']:
207 for val in range(10, 20):
208 print "iter_splits(%s, **{%s : %s}))" % (l, kw, val)
209 res = list(iter_splits(l, **{kw : val}))
210 print kw, "=", val
211 print " len = ", len(res), ", max(size) = ", max(map(len, res)), ", min(size) = ", min(map(len, res))
212 print " ", res
214 assert list(itertools.chain.from_iterable(res)) == l
215 if kw == 'parts':
216 assert len(res) == val
217 if kw == 'max_size':
218 assert max(map(len, res)) <= val
219 if kw == 'min_size':
220 assert min(map(len, res)) >= val
222 #test_partial()
223 #test_split()
225 get_random_output_base(0, 1)