pyx/graph/data.py

   1 # -*- encoding: utf-8 -*-
   2 #
   3 #
   4 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
   5 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
   6 # Copyright (C) 2002-2006 André Wobst <wobsta@users.sourceforge.net>
   7 #
   8 # This file is part of PyX (http://pyx.sourceforge.net/).
   9 #
  10 # PyX is free software; you can redistribute it and/or modify
  11 # it under the terms of the GNU General Public License as published by
  12 # the Free Software Foundation; either version 2 of the License, or
  13 # (at your option) any later version.
  14 #
  15 # PyX is distributed in the hope that it will be useful,
  16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 # GNU General Public License for more details.
  19 #
  20 # You should have received a copy of the GNU General Public License
  21 # along with PyX; if not, write to the Free Software
  22 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
  23
  24 import math, re, ConfigParser, struct, warnings
  25 from pyx import text
  26 from pyx.style import linestyle
  27 from pyx.graph import style
  28
  29
  30 def splitatvalue(value, *splitpoints):
  31     section = 0
  32     while section < len(splitpoints) and splitpoints[section] < value:
  33         section += 1
  34     if len(splitpoints) > 1:
  35         if section % 2:
  36             section = None
  37         else:
  38             section >>= 1
  39     return (section, value)
  40
  41
  42 _mathglobals = {"neg": lambda x: -x,
  43                 "abs": lambda x: x < 0 and -x or x,
  44                 "sgn": lambda x: x < 0 and -1 or 1,
  45                 "sqrt": math.sqrt,
  46                 "exp": math.exp,
  47                 "log": math.log,
  48                 "sin": math.sin,
  49                 "cos": math.cos,
  50                 "tan": math.tan,
  51                 "asin": math.asin,
  52                 "acos": math.acos,
  53                 "atan": math.atan,
  54                 "sind": lambda x: math.sin(math.pi/180*x),
  55                 "cosd": lambda x: math.cos(math.pi/180*x),
  56                 "tand": lambda x: math.tan(math.pi/180*x),
  57                 "asind": lambda x: 180/math.pi*math.asin(x),
  58                 "acosd": lambda x: 180/math.pi*math.acos(x),
  59                 "atand": lambda x: 180/math.pi*math.atan(x),
  60                 "norm": lambda x, y: math.hypot(x, y),
  61                 "splitatvalue": splitatvalue,
  62                 "pi": math.pi,
  63                 "e": math.e}
  64
  65
  66 class _data:
  67     """graph data interface
  68
  69     Graph data consists in columns, where each column might be identified by a
  70     string or an integer. Each row in the resulting table refers to a data
  71     point.
  72
  73     All methods except for the constructor should consider self and its
  74     attributes to be readonly, since the data instance might be shared between
  75     several graphs simultaniously.
  76
  77     The instance variable columns is a dictionary mapping column names to the
  78     data of the column (i.e. to a list). Only static columns (known at
  79     construction time) are contained in that dictionary. For data with numbered
  80     columns the column data is also available via the list columndata.
  81     Otherwise the columndata list should be missing and an access to a column
  82     number will fail.
  83
  84     The names of all columns (static and dynamic) must be fixed at the constructor
  85     and stated in the columnnames dictionary.
  86
  87     The instance variable title and defaultstyles contain the data title and
  88     the default styles (a list of styles), respectively.
  89     """
  90
  91     def dynamiccolumns(self, graph):
  92         """create and return dynamic columns data
  93
  94         Returns dynamic data matching the given axes (the axes range and other
  95         data might be used). The return value is a dictionary similar to the
  96         columns instance variable.
  97         """
  98         return {}
  99
 100
 101 defaultsymbols = [style.symbol()]
 102 defaultlines = [style.line()]
 103
 104
 105 class values(_data):
 106
 107     defaultstyles = defaultsymbols
 108
 109     def __init__(self, title="user provided values", **columns):
 110         for i, values in enumerate(columns.values()):
 111             if i and len(values) != l:
 112                 raise ValueError("different number of values")
 113             else:
 114                 l = len(values)
 115         self.columns = columns
 116         self.columnnames = columns.keys()
 117         self.title = title
 118
 119
 120 class points(_data):
 121     "Graph data from a list of points"
 122
 123     defaultstyles = defaultsymbols
 124
 125     def __init__(self, points, title="user provided points", addlinenumbers=1, **columns):
 126         if len(points):
 127             l = len(points[0])
 128             self.columndata = [[x] for x in points[0]]
 129             for point in points[1:]:
 130                 if l != len(point):
 131                     raise ValueError("different number of columns per point")
 132                 for i, x in enumerate(point):
 133                     self.columndata[i].append(x)
 134             for v in columns.values():
 135                 if abs(v) > l or (not addlinenumbers and abs(v) == l):
 136                     raise ValueError("column number bigger than number of columns")
 137             if addlinenumbers:
 138                 self.columndata = [range(1, len(points) + 1)] + self.columndata
 139             self.columns = dict([(key, self.columndata[i]) for key, i in columns.items()])
 140         else:
 141             self.columns = dict([(key, []) for key, i in columns.items()])
 142         self.columnnames = self.columns.keys()
 143         self.title = title
 144
 145
 146 def list(*args, **kwargs):
 147     warnings.warn("graph.data.list is deprecated. Use graph.data.points instead.")
 148     return points(*args, **kwargs)
 149
 150
 151 class _notitle:
 152     pass
 153
 154 _columnintref = re.compile(r"\$(-?\d+)", re.IGNORECASE)
 155
 156 class data(_data):
 157     "creates a new data set out of an existing data set"
 158
 159     def __init__(self, data, title=_notitle, context={}, copy=1,
 160                        replacedollar=1, columncallback="__column__", **columns):
 161         # build a nice title
 162         if title is _notitle:
 163             items = columns.items()
 164             items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
 165             self.title = "%s: %s" % (text.escapestring(data.title or "unkown source"),
 166                                      ", ".join(["%s=%s" % (text.escapestring(key),
 167                                                            text.escapestring(str(value)))
 168                                                 for key, value in items]))
 169         else:
 170             self.title = title
 171
 172         self.orgdata = data
 173         self.defaultstyles = self.orgdata.defaultstyles
 174
 175         # analyse the **columns argument
 176         self.columns = {}
 177         for columnname, value in columns.items():
 178             # search in the columns dictionary
 179             try:
 180                 self.columns[columnname] = self.orgdata.columns[value]
 181             except KeyError:
 182                 # search in the columndata list
 183                 try:
 184                     self.columns[columnname] = self.orgdata.columndata[value]
 185                 except (AttributeError, TypeError):
 186                     # value was not an valid column identifier
 187                     # i.e. take it as a mathematical expression
 188                     if replacedollar:
 189                         m = _columnintref.search(value)
 190                         while m:
 191                             value = "%s%s(%s)%s" % (value[:m.start()], columncallback, m.groups()[0], value[m.end():])
 192                             m = _columnintref.search(value)
 193                         value = value.replace("$", columncallback)
 194                     expression = compile(value.strip(), __file__, "eval")
 195                     context = context.copy()
 196                     context[columncallback] = self.columncallback
 197                     if self.orgdata.columns:
 198                         key, columndata = self.orgdata.columns.items()[0]
 199                         count = len(columndata)
 200                     elif self.orgdata.columndata:
 201                         count = len(self.orgdata.columndata[0])
 202                     else:
 203                         count = 0
 204                     newdata = []
 205                     for i in xrange(count):
 206                         self.columncallbackcount = i
 207                         for key, values in self.orgdata.columns.items():
 208                             context[key] = values[i]
 209                         try:
 210                             newdata.append(eval(expression, _mathglobals, context))
 211                         except (ArithmeticError, ValueError):
 212                             newdata.append(None)
 213                     self.columns[columnname] = newdata
 214
 215         if copy:
 216             # copy other, non-conflicting column names
 217             for columnname, columndata in self.orgdata.columns.items():
 218                 if not self.columns.has_key(columnname):
 219                     self.columns[columnname] = columndata
 220
 221         self.columnnames = self.columns.keys()
 222
 223     def columncallback(self, value):
 224         try:
 225             return self.orgdata.columndata[value][self.columncallbackcount]
 226         except:
 227             return self.orgdata.columns[value][self.columncallbackcount]
 228
 229
 230 filecache = {}
 231
 232 class file(data):
 233
 234     defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
 235     defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
 236     defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
 237
 238     def splitline(self, line, stringpattern, columnpattern, tofloat=1):
 239         """returns a tuple created out of the string line
 240         - matches stringpattern and columnpattern, adds the first group of that
 241           match to the result and and removes those matches until the line is empty
 242         - when stringpattern matched, the result is always kept as a string
 243         - when columnpattern matched and tofloat is true, a conversion to a float
 244           is tried; when this conversion fails, the string is kept"""
 245         result = []
 246         # try to gain speed by skip matching regular expressions
 247         if line.find('"')!=-1 or \
 248            stringpattern is not self.defaultstringpattern or \
 249            columnpattern is not self.defaultcolumnpattern:
 250             while len(line):
 251                 match = stringpattern.match(line)
 252                 if match:
 253                     result.append(match.groups()[0])
 254                     line = line[match.end():]
 255                 else:
 256                     match = columnpattern.match(line)
 257                     if tofloat:
 258                         try:
 259                             result.append(float(match.groups()[0]))
 260                         except (TypeError, ValueError):
 261                             result.append(match.groups()[0])
 262                     else:
 263                         result.append(match.groups()[0])
 264                     line = line[match.end():]
 265         else:
 266             if tofloat:
 267                 try:
 268                     return map(float, line.split())
 269                 except (TypeError, ValueError):
 270                     result = []
 271                     for r in line.split():
 272                         try:
 273                             result.append(float(r))
 274                         except (TypeError, ValueError):
 275                             result.append(r)
 276             else:
 277                 return line.split()
 278         return result
 279
 280     def getcachekey(self, *args):
 281         return ":".join([str(x) for x in args])
 282
 283     def __init__(self, filename,
 284                        commentpattern=defaultcommentpattern,
 285                        stringpattern=defaultstringpattern,
 286                        columnpattern=defaultcolumnpattern,
 287                        skiphead=0, skiptail=0, every=1,
 288                        **kwargs):
 289
 290         def readfile(file, title, self=self, commentpattern=commentpattern, stringpattern=stringpattern, columnpattern=columnpattern, skiphead=skiphead, skiptail=skiptail, every=every):
 291             columns = []
 292             columndata = []
 293             linenumber = 0
 294             maxcolumns = 0
 295             for line in file.readlines():
 296                 line = line.strip()
 297                 match = commentpattern.match(line)
 298                 if match:
 299                     if not len(columndata):
 300                         columns = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
 301                 else:
 302                     linedata = []
 303                     for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
 304                         linedata.append(value)
 305                     if len(linedata):
 306                         if linenumber >= skiphead and not ((linenumber - skiphead) % every):
 307                             linedata = [linenumber + 1] + linedata
 308                             if len(linedata) > maxcolumns:
 309                                 maxcolumns = len(linedata)
 310                             columndata.append(linedata)
 311                         linenumber += 1
 312             if skiptail >= every:
 313                 skip, x = divmod(skiptail, every)
 314                 del columndata[-skip:]
 315             for i in xrange(len(columndata)):
 316                 if len(columndata[i]) != maxcolumns:
 317                     columndata[i].extend([None]*(maxcolumns-len(columndata[i])))
 318             return points(columndata, title=title, addlinenumbers=0,
 319                           **dict([(column, i+1) for i, column in enumerate(columns[:maxcolumns-1])]))
 320
 321         try:
 322             filename.readlines
 323         except:
 324             # not a file-like object -> open it
 325             cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
 326             if not filecache.has_key(cachekey):
 327                 filecache[cachekey] = readfile(open(filename), filename)
 328             data.__init__(self, filecache[cachekey], **kwargs)
 329         else:
 330             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 331
 332
 333 conffilecache = {}
 334
 335 class conffile(data):
 336
 337     def __init__(self, filename, **kwargs):
 338         """read data from a config-like file
 339         - filename is a string
 340         - each row is defined by a section in the config-like file (see
 341           config module description)
 342         - the columns for each row are defined by lines in the section file;
 343           the option entries identify and name the columns
 344         - further keyword arguments are passed to the constructor of data,
 345           keyword arguments data and titles excluded"""
 346
 347         def readfile(file, title):
 348             config = ConfigParser.ConfigParser()
 349             config.optionxform = str
 350             config.readfp(file)
 351             sections = config.sections()
 352             sections.sort()
 353             columndata = [None]*len(sections)
 354             maxcolumns = 1
 355             columns = {}
 356             for i in xrange(len(sections)):
 357                 point = [sections[i]] + [None]*(maxcolumns-1)
 358                 for option in config.options(sections[i]):
 359                     value = config.get(sections[i], option)
 360                     try:
 361                         value = float(value)
 362                     except:
 363                         pass
 364                     try:
 365                         index = columns[option]
 366                     except KeyError:
 367                         columns[option] = maxcolumns
 368                         point.append(value)
 369                         maxcolumns += 1
 370                     else:
 371                         point[index] = value
 372                 columndata[i] = point
 373             # wrap result into a data instance to remove column numbers
 374             result = data(points(columndata, addlinenumbers=0, **columns), title=title)
 375             # ... but reinsert sections as linenumbers
 376             result.columndata = [[x[0] for x in columndata]]
 377             return result
 378
 379         try:
 380             filename.readlines
 381         except:
 382             # not a file-like object -> open it
 383             if not filecache.has_key(filename):
 384                 filecache[filename] = readfile(open(filename), filename)
 385             data.__init__(self, filecache[filename], **kwargs)
 386         else:
 387             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 388
 389
 390 cbdfilecache = {}
 391
 392 class cbdfile(data):
 393
 394     defaultstyles = defaultlines
 395
 396     def getcachekey(self, *args):
 397         return ":".join([str(x) for x in args])
 398
 399     def __init__(self, filename, minrank=None, maxrank=None, **kwargs):
 400
 401         class cbdhead:
 402
 403             def __init__(self, file):
 404                 (self.magic,
 405                  self.dictaddr,
 406                  self.segcount,
 407                  self.segsize,
 408                  self.segmax,
 409                  self.fill) = struct.unpack("<5i20s", file.read(40))
 410                 if self.magic != 0x20770002:
 411                     raise ValueError("bad magic number")
 412
 413         class segdict:
 414
 415             def __init__(self, file, i):
 416                 self.index = i
 417                 (self.segid,
 418                  self.maxlat,
 419                  self.minlat,
 420                  self.maxlong,
 421                  self.minlong,
 422                  self.absaddr,
 423                  self.nbytes,
 424                  self.rank) = struct.unpack("<6i2h", file.read(28))
 425
 426         class segment:
 427
 428             def __init__(self, file, sd):
 429                 file.seek(sd.absaddr)
 430                 (self.orgx,
 431                  self.orgy,
 432                  self.id,
 433                  self.nstrokes,
 434                  self.dummy) = struct.unpack("<3i2h", file.read(16))
 435                 oln, olt = self.orgx, self.orgy
 436                 self.points = [(olt, oln)]
 437                 for i in range(self.nstrokes):
 438                     c1, c2 = struct.unpack("2c", file.read(2))
 439                     if ord(c2) & 0x40:
 440                         if c1 > "\177":
 441                             dy = ord(c1) - 256
 442                         else:
 443                             dy = ord(c1)
 444                         if c2 > "\177":
 445                             dx = ord(c2) - 256
 446                         else:
 447                             dx = ord(c2) - 64
 448                     else:
 449                         c3, c4, c5, c6, c7, c8 = struct.unpack("6c", file.read(6))
 450                         if c2 > "\177":
 451                             c2 = chr(ord(c2) | 0x40)
 452                         dx, dy = struct.unpack("<2i", c3+c4+c1+c2+c7+c8+c5+c6)
 453                     oln += dx
 454                     olt += dy
 455                     self.points.append((olt, oln))
 456                 sd.nstrokes = self.nstrokes
 457
 458         def readfile(file, title):
 459             h = cbdhead(file)
 460             file.seek(h.dictaddr)
 461             sds = [segdict(file, i+1) for i in range(h.segcount)]
 462             sbs = [segment(file, sd) for sd in sds]
 463
 464             # remove jumps at long +/- 180
 465             for sd, sb in zip(sds, sbs):
 466                 if sd.minlong < -150*3600 and sd.maxlong > 150*3600:
 467                     for i, (lat, long) in enumerate(sb.points):
 468                          if long < 0:
 469                              sb.points[i] = lat, long + 360*3600
 470
 471             columndata = []
 472             for sd, sb in zip(sds, sbs):
 473                 if ((minrank is None or sd.rank >= minrank) and
 474                     (maxrank is None or sd.rank <= maxrank)):
 475                     if columndata:
 476                         columndata.append((None, None))
 477                     columndata.extend([(long/3600.0, lat/3600.0)
 478                                        for lat, long in sb.points])
 479
 480             result = points(columndata, title=title)
 481             result.defaultstyles = self.defaultstyles
 482             return result
 483
 484
 485         try:
 486             filename.readlines
 487         except:
 488             # not a file-like object -> open it
 489             cachekey = self.getcachekey(filename, minrank, maxrank)
 490             if not cbdfilecache.has_key(cachekey):
 491                 cbdfilecache[cachekey] = readfile(open(filename, "rb"), filename)
 492             data.__init__(self, cbdfilecache[cachekey], **kwargs)
 493         else:
 494             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 495
 496
 497 class function(_data):
 498
 499     defaultstyles = defaultlines
 500
 501     assignmentpattern = re.compile(r"\s*([a-z_][a-z0-9_]*)\s*\(\s*([a-z_][a-z0-9_]*)\s*\)\s*=", re.IGNORECASE)
 502
 503     def __init__(self, expression, title=_notitle, min=None, max=None,
 504                  points=100, context={}):
 505
 506         if title is _notitle:
 507             self.title = expression
 508         else:
 509             self.title = title
 510         self.min = min
 511         self.max = max
 512         self.numberofpoints = points
 513         self.context = context.copy() # be safe on late evaluations
 514         m = self.assignmentpattern.match(expression)
 515         if m:
 516             self.yname, self.xname = m.groups()
 517             expression = expression[m.end():]
 518         else:
 519             raise ValueError("y(x)=... or similar expected")
 520         if context.has_key(self.xname):
 521             raise ValueError("xname in context")
 522         self.expression = compile(expression.strip(), __file__, "eval")
 523         self.columns = {}
 524         self.columnnames = [self.xname, self.yname]
 525
 526     def dynamiccolumns(self, graph):
 527         dynamiccolumns = {self.xname: [], self.yname: []}
 528
 529         xaxis = graph.axes[self.xname]
 530         from pyx.graph.axis import logarithmic
 531         logaxis = isinstance(xaxis.axis, logarithmic)
 532         if self.min is not None:
 533             min = self.min
 534         else:
 535             min = xaxis.data.min
 536         if self.max is not None:
 537             max = self.max
 538         else:
 539             max = xaxis.data.max
 540         if logaxis:
 541             min = math.log(min)
 542             max = math.log(max)
 543         for i in range(self.numberofpoints):
 544             x = min + (max-min)*i / (self.numberofpoints-1.0)
 545             if logaxis:
 546                 x = math.exp(x)
 547             dynamiccolumns[self.xname].append(x)
 548             self.context[self.xname] = x
 549             try:
 550                 y = eval(self.expression, _mathglobals, self.context)
 551             except (ArithmeticError, ValueError):
 552                 y = None
 553             dynamiccolumns[self.yname].append(y)
 554         return dynamiccolumns
 555
 556
 557 class functionxy(function):
 558
 559     def __init__(self, f, min=None, max=None, **kwargs):
 560         function.__init__(self, "y(x)=f(x)", context={"f": f}, min=min, max=max, **kwargs)
 561
 562
 563 class paramfunction(_data):
 564
 565     defaultstyles = defaultlines
 566
 567     def __init__(self, varname, min, max, expression, title=_notitle, points=100, context={}):
 568         if context.has_key(varname):
 569             raise ValueError("varname in context")
 570         if title is _notitle:
 571             self.title = expression
 572         else:
 573             self.title = title
 574         varlist, expression = expression.split("=")
 575         expression = compile(expression.strip(), __file__, "eval")
 576         keys = [key.strip() for key in varlist.split(",")]
 577         self.columns = dict([(key, []) for key in keys])
 578         context = context.copy()
 579         for i in range(points):
 580             param = min + (max-min)*i / (points-1.0)
 581             context[varname] = param
 582             values = eval(expression, _mathglobals, context)
 583             for key, value in zip(keys, values):
 584                 self.columns[key].append(value)
 585         if len(keys) != len(values):
 586             raise ValueError("unpack tuple of wrong size")
 587         self.columnnames = self.columns.keys()
 588
 589
 590 class paramfunctionxy(paramfunction):
 591
 592     def __init__(self, f, min, max, **kwargs):
 593         paramfunction.__init__(self, "t", min, max, "x, y = f(t)", context={"f": f}, **kwargs)