utils/utils.py

   1 import re
   2 import subprocess
   3 import os
   4 import copy
   5 from os.path import abspath, exists
   6 import shutil
   7 import functools
   8 import inspect
   9 import itertools
  10 import numpy
  11
  12 import misc
  13 from colors import BlackWhite
  14 import types
  15
  16 VIEWER_LIST=['qgo', 'kombilo']
  17
  18 def viewer_open(sgf_filename, executable=VIEWER_LIST[1]):
  19     p = subprocess.Popen([executable, sgf_filename])
  20     return
  21
  22 def bark():
  23     subprocess.call('bark', shell=True)
  24
  25 def check_output(*args,  **kwargs):
  26     if hasattr(subprocess, 'check_output'):
  27         return subprocess.check_output(*args, **kwargs)
  28     else:
  29         if 'stdout' in kwargs:
  30             raise ValueError('stdout argument not allowed, it will be overridden.')
  31         process = subprocess.Popen(stdout=subprocess.PIPE, *args, **kwargs)
  32         output, unused_err = process.communicate()
  33         retcode = process.poll()
  34         if retcode:
  35             cmd = kwargs.get("args")
  36             if cmd is None:
  37                 cmd = args[0]
  38             raise subprocess.CalledProcessError(retcode, cmd, output=output)
  39         return output
  40
  41 def get_year(datestr,  match_century=True):
  42     """Function trying to extract date from a string - usually a DT field of a sgf_file.
  43     First, look for the first year string in the @datestr.
  44     if not found and the @match_century is true, we look for strings like
  45     "18th century", which results in year 1750 (the mean of years in 18th century)
  46
  47     Returns None if no year found.
  48     """
  49     # 1982-10-10
  50     # note the non-greedy .*? expansion => the first date string in the result gets matched
  51     # that is get_year("1982 1999") = 1982
  52     match = re.match( '.*?([0-9]{4}).*', datestr)
  53     if match:
  54             return int(match.group(1))
  55
  56     if match_century:
  57         # 17th century, 18th c.
  58         match = re.match( '.*[^0-9]?([0-9]{2}th c).*', datestr)
  59         if match:
  60             century = int(match.group(1)[:2])
  61             # returns "mean" year of the century:
  62             # 17th century -> 1650
  63             return century * 100 - 50
  64
  65     return None
  66
  67 def get_poly_s(coefs):
  68     """Returns a string with polynomial equation; e.g.:
  69
  70     >>> from utils import get_poly_s
  71     >>> get_poly_s([0.5,0,4])
  72     'y = 0.50x^2 + 4.00'
  73     >>> get_poly_s([1,2,3,4])
  74     'y = 1.00x^3 + 2.00x^2 + 3.00x + 4.00'
  75     """
  76     C = []
  77     for pw, co in enumerate(reversed(coefs)):
  78         if co:
  79             s = "%.2f" % co
  80             if pw:
  81                 s += 'x'
  82                 if pw > 1:
  83                     s += '^%d' % pw
  84             C.append(s)
  85     return 'y = ' + ' + '.join(reversed(C))
  86
  87
  88 """
  89
  90 class ReprWrapper(object):
  91     def __init__(self, repr_f, f):
  92         self.repr_f = repr_f
  93         self.f = f
  94         functools.update_wrapper(self, f)
  95     def __call__(self, *args, **kwargs):
  96         return self.f(*args, **kwargs)
  97     def __repr__(self):
  98         return self.repr_f(self.f)
  99 """
 100
 101 def repr_origin(f):
 102     if hasattr(f, 'im_class'):
 103         prefix = f.im_class
 104     else:
 105         prefix = f.__module__
 106     return "%s.%s"%(prefix, f.__name__)
 107
 108 def head(f, n=10):
 109     print f.filename
 110     with open(f.filename, 'r') as fin:
 111         for line in itertools.islice(fin, n):
 112             print line
 113
 114 def iter_splits(l, parts=None, max_size=None, min_size=None):
 115     """Will yield consequent sublist of the @l list, trying to result
 116     evenly sized sublists.  Exactly one of the parameters @parts or
 117     @max_size or @min_size must be specified.
 118
 119     specifiing parts = N will yield N sublists of (almost) even size. The
 120     list size difference is guaranted to be at most 1.
 121
 122     >>> list(iter_splits(range(5), parts=2))
 123     [[0, 1, 2], [3, 4]]
 124     >>> list(iter_splits(range(5), parts=4))
 125     [[0, 1], [2], [3], [4]]
 126
 127
 128
 129     specifiing max_size = N returns the smallest possible number of
 130     consequent sublists so that whole list is divided and size of each
 131     part is <= N
 132
 133     >>> list(iter_splits(range(5), max_size=3))
 134     [[0, 1, 2], [3, 4]]
 135     >>> list(iter_splits(range(5), max_size=10))
 136     [[0, 1, 2, 3, 4]]
 137
 138     Calling iter_splits(l, max_size=N) is just a shorthand for calling
 139     iter_splits(l, parts=len(l) / N + bool(len(l)% N) )
 140
 141
 142
 143     Similarly min_size = N returns the largest possible number of
 144     consequent sublists so that whole list is divided and size of each
 145     part is >= N
 146
 147     Calling iter_splits(l, min_size=N) is just a shorthand for calling
 148     iter_splits(l, parts=len(l) / N )
 149     """
 150     if bool(parts) + bool(max_size) + bool( min_size) != 1:
 151         raise TypeError('Exactly one of parts, max_size or exact_size arguments must be specified (and nonzero)')
 152
 153     if parts:
 154         print parts
 155         pn, rest = len(l) / parts, len(l) % parts
 156         if pn == 0:
 157             raise ValueError("Number of parts to split must not be larger than the number of elements.")
 158
 159         def sizes(pn, rest):
 160             for i in xrange(parts):
 161                 if rest:
 162                     yield pn + 1
 163                     rest -= 1
 164                 else:
 165                     yield pn
 166
 167         stop = 0
 168         for size in sizes(pn, rest):
 169             start, stop = stop, stop + size
 170             yield l[start: stop]
 171
 172     if max_size:
 173         pn, rest = len(l) / max_size, len(l) % max_size
 174         if rest:
 175             pn += 1
 176         for split in iter_splits(l, parts=pn):
 177             yield split
 178
 179     if min_size:
 180         for split in iter_splits(l, parts=len(l)/min_size):
 181             yield split
 182
 183 def iter_exact_splits(l, split_size):
 184     tail = copy.copy(l)
 185
 186     while tail:
 187         head, tail = tail[:split_size], tail[split_size:]
 188         # the last head could be shorter
 189         if len(head) == split_size:
 190             yield head
 191
 192 def pearson_coef(vec1, vec2):
 193     assert vec1.shape == vec2.shape
 194     def norm(vec):
 195         return numpy.sqrt((vec*vec).sum())
 196     def center(vec):
 197         return vec - vec.mean()
 198     vec1, vec2 = center(vec1), center(vec2)
 199     return (vec1 * vec2).sum() / (norm(vec1) * norm(vec2))
 200
 201
 202 if __name__ == '__main__':
 203     def test_split():
 204         l = range(20)
 205
 206         for kw in ['parts', 'max_size',  'min_size']:
 207             for val in  range(10, 20):
 208                 print "iter_splits(%s, **{%s : %s}))" % (l,  kw,  val)
 209                 res = list(iter_splits(l, **{kw : val}))
 210                 print kw, "=", val
 211                 print "   len = ", len(res), ", max(size) = ", max(map(len, res)), ", min(size) = ", min(map(len, res))
 212                 print "   ", res
 213
 214                 assert list(itertools.chain.from_iterable(res)) == l
 215                 if kw == 'parts':
 216                     assert len(res) == val
 217                 if kw == 'max_size':
 218                     assert max(map(len, res)) <= val
 219                 if kw == 'min_size':
 220                     assert min(map(len, res)) >= val
 221
 222     #test_partial()
 223     #test_split()
 224
 225     get_random_output_base(0, 1)