pyx/graph/data.py

   1 #!/usr/bin/env python
   2 # -*- coding: ISO-8859-1 -*-
   3 #
   4 #
   5 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
   6 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
   7 # Copyright (C) 2002-2005 André Wobst <wobsta@users.sourceforge.net>
   8 #
   9 # This file is part of PyX (http://pyx.sourceforge.net/).
  10 #
  11 # PyX is free software; you can redistribute it and/or modify
  12 # it under the terms of the GNU General Public License as published by
  13 # the Free Software Foundation; either version 2 of the License, or
  14 # (at your option) any later version.
  15 #
  16 # PyX is distributed in the hope that it will be useful,
  17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 # GNU General Public License for more details.
  20 #
  21 # You should have received a copy of the GNU General Public License
  22 # along with PyX; if not, write to the Free Software
  23 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
  24
  25 from __future__ import nested_scopes
  26
  27 import math, re, ConfigParser, struct, warnings
  28 from pyx import text
  29 from pyx.style import linestyle
  30 from pyx.graph import style
  31
  32 try:
  33     enumerate([])
  34 except NameError:
  35     # fallback implementation for Python 2.2 and below
  36     def enumerate(list):
  37         return zip(xrange(len(list)), list)
  38
  39 try:
  40     dict()
  41 except NameError:
  42     # fallback implementation for Python 2.1
  43     def dict(items):
  44         result = {}
  45         for key, value in items:
  46             result[key] = value
  47         return result
  48
  49
  50 def splitatvalue(value, *splitpoints):
  51     section = 0
  52     while section < len(splitpoints) and splitpoints[section] < value:
  53         section += 1
  54     if len(splitpoints) > 1:
  55         if section % 2:
  56             section = None
  57         else:
  58             section >>= 1
  59     return (section, value)
  60
  61
  62 _mathglobals = {"neg": lambda x: -x,
  63                 "abs": lambda x: x < 0 and -x or x,
  64                 "sgn": lambda x: x < 0 and -1 or 1,
  65                 "sqrt": math.sqrt,
  66                 "exp": math.exp,
  67                 "log": math.log,
  68                 "sin": math.sin,
  69                 "cos": math.cos,
  70                 "tan": math.tan,
  71                 "asin": math.asin,
  72                 "acos": math.acos,
  73                 "atan": math.atan,
  74                 "sind": lambda x: math.sin(math.pi/180*x),
  75                 "cosd": lambda x: math.cos(math.pi/180*x),
  76                 "tand": lambda x: math.tan(math.pi/180*x),
  77                 "asind": lambda x: 180/math.pi*math.asin(x),
  78                 "acosd": lambda x: 180/math.pi*math.acos(x),
  79                 "atand": lambda x: 180/math.pi*math.atan(x),
  80                 "norm": lambda x, y: math.hypot(x, y),
  81                 "splitatvalue": splitatvalue,
  82                 "pi": math.pi,
  83                 "e": math.e}
  84
  85
  86 class _data:
  87     """graph data interface
  88
  89     Graph data consists in columns, where each column might be identified by a
  90     string or an integer. Each row in the resulting table refers to a data
  91     point.
  92
  93     All methods except for the constructor should consider self and its
  94     attributes to be readonly, since the data instance might be shared between
  95     several graphs simultaniously.
  96
  97     The instance variable columns is a dictionary mapping column names to the
  98     data of the column (i.e. to a list). Only static columns (known at
  99     construction time) are contained in that dictionary. For data with numbered
 100     columns the column data is also available via the list columndata.
 101     Otherwise the columndata list should be missing and an access to a column
 102     number will fail.
 103
 104     The names of all columns (static and dynamic) must be fixed at the constructor
 105     and stated in the columnnames dictionary.
 106
 107     The instance variable title and defaultstyles contain the data title and
 108     the default styles (a list of styles), respectively.
 109     """
 110
 111     def dynamiccolumns(self, graph):
 112         """create and return dynamic columns data
 113
 114         Returns dynamic data matching the given axes (the axes range and other
 115         data might be used). The return value is a dictionary similar to the
 116         columns instance variable.
 117         """
 118         return {}
 119
 120
 121 class list(_data):
 122     "Graph data from a list of points"
 123
 124     defaultstyles = [style.symbol()]
 125
 126     def __init__(self, points, title="user provided list", addlinenumbers=1, **columns):
 127         if len(points):
 128             l = len(points[0])
 129             self.columndata = [[x] for x in points[0]]
 130             for point in points[1:]:
 131                 if l != len(point):
 132                     raise ValueError("different number of columns per point")
 133                 for i, x in enumerate(point):
 134                     self.columndata[i].append(x)
 135             for v in columns.values():
 136                 if abs(v) > l or (not addlinenumbers and abs(v) == l):
 137                     raise ValueError("column number bigger than number of columns")
 138             if addlinenumbers:
 139                 self.columndata = [range(1, len(points) + 1)] + self.columndata
 140             self.columns = dict([(key, self.columndata[i]) for key, i in columns.items()])
 141         else:
 142             self.columns = dict([(key, []) for key, i in columns])
 143         self.columnnames = self.columns.keys()
 144         self.title = title
 145         self.defaultstyles = [style.symbol()]
 146
 147
 148 class _notitle:
 149     pass
 150
 151 _columnintref = re.compile(r"\$(-?\d+)", re.IGNORECASE)
 152
 153 class data(_data):
 154     "creates a new data set out of an existing data set"
 155
 156     def __init__(self, data, title=_notitle, context={}, copy=1,
 157                        replacedollar=1, columncallback="__column__", **columns):
 158         # build a nice title
 159         if title is _notitle:
 160             items = columns.items()
 161             items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
 162             self.title = "%s: %s" % (text.escapestring(data.title or "unkown source"),
 163                                      ", ".join(["%s=%s" % (text.escapestring(key),
 164                                                            text.escapestring(str(value)))
 165                                                 for key, value in items]))
 166         else:
 167             self.title = title
 168
 169         self.orgdata = data
 170         self.defaultstyles = self.orgdata.defaultstyles
 171
 172         # analyse the **columns argument
 173         self.columns = {}
 174         for columnname, value in columns.items():
 175             # search in the columns dictionary
 176             try:
 177                 self.columns[columnname] = self.orgdata.columns[value]
 178             except KeyError:
 179                 # search in the columndata list
 180                 try:
 181                     self.columns[columnname] = self.orgdata.columndata[value]
 182                 except (AttributeError, TypeError):
 183                     # value was not an valid column identifier
 184                     # i.e. take it as a mathematical expression
 185                     if replacedollar:
 186                         m = _columnintref.search(value)
 187                         while m:
 188                             value = "%s%s(%s)%s" % (value[:m.start()], columncallback, m.groups()[0], value[m.end():])
 189                             m = _columnintref.search(value)
 190                         value = value.replace("$", columncallback)
 191                     expression = compile(value.strip(), __file__, "eval")
 192                     context = context.copy()
 193                     context[columncallback] = self.columncallback
 194                     if self.orgdata.columns:
 195                         key, columndata = self.orgdata.columns.items()[0]
 196                         count = len(columndata)
 197                     elif self.orgdata.columndata:
 198                         count = len(self.orgdata.columndata[0])
 199                     else:
 200                         count = 0
 201                     newdata = []
 202                     for i in xrange(count):
 203                         self.columncallbackcount = i
 204                         for key, values in self.orgdata.columns.items():
 205                             context[key] = values[i]
 206                         try:
 207                             newdata.append(eval(expression, _mathglobals, context))
 208                         except (ArithmeticError, ValueError):
 209                             newdata.append(None)
 210                     self.columns[columnname] = newdata
 211
 212         if copy:
 213             # copy other, non-conflicting column names
 214             for columnname, columndata in self.orgdata.columns.items():
 215                 if not self.columns.has_key(columnname):
 216                     self.columns[columnname] = columndata
 217
 218         self.columnnames = self.columns.keys()
 219
 220     def columncallback(self, value):
 221         try:
 222             return self.orgdata.columndata[value][self.columncallbackcount]
 223         except:
 224             return self.orgdata.columns[value][self.columncallbackcount]
 225
 226
 227 filecache = {}
 228
 229 class file(data):
 230
 231     defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
 232     defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
 233     defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
 234
 235     def splitline(self, line, stringpattern, columnpattern, tofloat=1):
 236         """returns a tuple created out of the string line
 237         - matches stringpattern and columnpattern, adds the first group of that
 238           match to the result and and removes those matches until the line is empty
 239         - when stringpattern matched, the result is always kept as a string
 240         - when columnpattern matched and tofloat is true, a conversion to a float
 241           is tried; when this conversion fails, the string is kept"""
 242         result = []
 243         # try to gain speed by skip matching regular expressions
 244         if line.find('"')!=-1 or \
 245            stringpattern is not self.defaultstringpattern or \
 246            columnpattern is not self.defaultcolumnpattern:
 247             while len(line):
 248                 match = stringpattern.match(line)
 249                 if match:
 250                     result.append(match.groups()[0])
 251                     line = line[match.end():]
 252                 else:
 253                     match = columnpattern.match(line)
 254                     if tofloat:
 255                         try:
 256                             result.append(float(match.groups()[0]))
 257                         except (TypeError, ValueError):
 258                             result.append(match.groups()[0])
 259                     else:
 260                         result.append(match.groups()[0])
 261                     line = line[match.end():]
 262         else:
 263             if tofloat:
 264                 try:
 265                     return map(float, line.split())
 266                 except (TypeError, ValueError):
 267                     result = []
 268                     for r in line.split():
 269                         try:
 270                             result.append(float(r))
 271                         except (TypeError, ValueError):
 272                             result.append(r)
 273             else:
 274                 return line.split()
 275         return result
 276
 277     def getcachekey(self, *args):
 278         return ":".join([str(x) for x in args])
 279
 280     def __init__(self, filename,
 281                        commentpattern=defaultcommentpattern,
 282                        stringpattern=defaultstringpattern,
 283                        columnpattern=defaultcolumnpattern,
 284                        skiphead=0, skiptail=0, every=1,
 285                        **kwargs):
 286
 287         def readfile(file, title, self=self, commentpattern=commentpattern, stringpattern=stringpattern, columnpattern=columnpattern, skiphead=skiphead, skiptail=skiptail, every=every):
 288             columns = []
 289             columndata = []
 290             linenumber = 0
 291             maxcolumns = 0
 292             for line in file.readlines():
 293                 line = line.strip()
 294                 match = commentpattern.match(line)
 295                 if match:
 296                     if not len(columndata):
 297                         columns = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
 298                 else:
 299                     linedata = []
 300                     for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
 301                         linedata.append(value)
 302                     if len(linedata):
 303                         if linenumber >= skiphead and not ((linenumber - skiphead) % every):
 304                             linedata = [linenumber + 1] + linedata
 305                             if len(linedata) > maxcolumns:
 306                                 maxcolumns = len(linedata)
 307                             columndata.append(linedata)
 308                         linenumber += 1
 309             if skiptail >= every:
 310                 skip, x = divmod(skiptail, every)
 311                 del columndata[-skip:]
 312             for i in xrange(len(columndata)):
 313                 if len(columndata[i]) != maxcolumns:
 314                     columndata[i].extend([None]*(maxcolumns-len(columndata[i])))
 315             return list(columndata, title=title, addlinenumbers=0,
 316                         **dict([(column, i+1) for i, column in enumerate(columns[:maxcolumns-1])]))
 317
 318         try:
 319             filename.readlines
 320         except:
 321             # not a file-like object -> open it
 322             cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
 323             if not filecache.has_key(cachekey):
 324                 filecache[cachekey] = readfile(open(filename), filename)
 325             data.__init__(self, filecache[cachekey], **kwargs)
 326         else:
 327             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 328
 329
 330 conffilecache = {}
 331
 332 class conffile(data):
 333
 334     def __init__(self, filename, **kwargs):
 335         """read data from a config-like file
 336         - filename is a string
 337         - each row is defined by a section in the config-like file (see
 338           config module description)
 339         - the columns for each row are defined by lines in the section file;
 340           the option entries identify and name the columns
 341         - further keyword arguments are passed to the constructor of data,
 342           keyword arguments data and titles excluded"""
 343
 344         def readfile(file, title):
 345             config = ConfigParser.ConfigParser()
 346             config.optionxform = str
 347             config.readfp(file)
 348             sections = config.sections()
 349             sections.sort()
 350             columndata = [None]*len(sections)
 351             maxcolumns = 1
 352             columns = {}
 353             for i in xrange(len(sections)):
 354                 point = [sections[i]] + [None]*(maxcolumns-1)
 355                 for option in config.options(sections[i]):
 356                     value = config.get(sections[i], option)
 357                     try:
 358                         value = float(value)
 359                     except:
 360                         pass
 361                     try:
 362                         index = columns[option]
 363                     except KeyError:
 364                         columns[option] = maxcolumns
 365                         point.append(value)
 366                         maxcolumns += 1
 367                     else:
 368                         point[index] = value
 369                 columndata[i] = point
 370             # wrap result into a data instance to remove column numbers
 371             result = data(list(columndata, addlinenumbers=0, **columns), title=title)
 372             # ... but reinsert sections as linenumbers
 373             result.columndata = [[x[0] for x in columndata]]
 374             return result
 375
 376         try:
 377             filename.readlines
 378         except:
 379             # not a file-like object -> open it
 380             if not filecache.has_key(filename):
 381                 filecache[filename] = readfile(open(filename), filename)
 382             data.__init__(self, filecache[filename], **kwargs)
 383         else:
 384             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 385
 386
 387 cbdfilecache = {}
 388
 389 class cbdfile(data):
 390
 391     def getcachekey(self, *args):
 392         return ":".join([str(x) for x in args])
 393
 394     def __init__(self, filename, minrank=None, maxrank=None, **kwargs):
 395
 396         class cbdhead:
 397
 398             def __init__(self, file):
 399                 (self.magic,
 400                  self.dictaddr,
 401                  self.segcount,
 402                  self.segsize,
 403                  self.segmax,
 404                  self.fill) = struct.unpack("<5i20s", file.read(40))
 405                 if self.magic != 0x20770002:
 406                     raise ValueError("bad magic number")
 407
 408         class segdict:
 409
 410             def __init__(self, file, i):
 411                 self.index = i
 412                 (self.segid,
 413                  self.maxlat,
 414                  self.minlat,
 415                  self.maxlong,
 416                  self.minlong,
 417                  self.absaddr,
 418                  self.nbytes,
 419                  self.rank) = struct.unpack("<6i2h", file.read(28))
 420
 421         class segment:
 422
 423             def __init__(self, file, sd):
 424                 file.seek(sd.absaddr)
 425                 (self.orgx,
 426                  self.orgy,
 427                  self.id,
 428                  self.nstrokes,
 429                  self.dummy) = struct.unpack("<3i2h", file.read(16))
 430                 oln, olt = self.orgx, self.orgy
 431                 self.points = [(olt, oln)]
 432                 for i in range(self.nstrokes):
 433                     c1, c2 = struct.unpack("2c", file.read(2))
 434                     if ord(c2) & 0x40:
 435                         if c1 > "\177":
 436                             dy = ord(c1) - 256
 437                         else:
 438                             dy = ord(c1)
 439                         if c2 > "\177":
 440                             dx = ord(c2) - 256
 441                         else:
 442                             dx = ord(c2) - 64
 443                     else:
 444                         c3, c4, c5, c6, c7, c8 = struct.unpack("6c", file.read(6))
 445                         if c2 > "\177":
 446                             c2 = chr(ord(c2) | 0x40)
 447                         dx, dy = struct.unpack("<2i", c3+c4+c1+c2+c7+c8+c5+c6)
 448                     oln += dx
 449                     olt += dy
 450                     self.points.append((olt, oln))
 451                 sd.nstrokes = self.nstrokes
 452
 453         def readfile(file, title):
 454             h = cbdhead(file)
 455             file.seek(h.dictaddr)
 456             sds = [segdict(file, i+1) for i in range(h.segcount)]
 457             sbs = [segment(file, sd) for sd in sds]
 458
 459             # remove jumps at long +/- 180
 460             for sd, sb in zip(sds, sbs):
 461                 if sd.minlong < -150*3600 and sd.maxlong > 150*3600:
 462                     for i, (lat, long) in enumerate(sb.points):
 463                          if long < 0:
 464                              sb.points[i] = lat, long + 360*3600
 465
 466             columndata = []
 467             for sd, sb in zip(sds, sbs):
 468                 if ((minrank is None or sd.rank >= minrank) and
 469                     (maxrank is None or sd.rank <= maxrank)):
 470                     if columndata:
 471                         columndata.append((None, None))
 472                     columndata.extend([(long/3600.0, lat/3600.0)
 473                                        for lat, long in sb.points])
 474
 475             result = list(columndata, title=title)
 476             result.defaultstyles = [style.line()]
 477             return result
 478
 479
 480         try:
 481             filename.readlines
 482         except:
 483             # not a file-like object -> open it
 484             cachekey = self.getcachekey(filename, minrank, maxrank)
 485             if not cbdfilecache.has_key(cachekey):
 486                 cbdfilecache[cachekey] = readfile(open(filename, "rb"), filename)
 487             data.__init__(self, cbdfilecache[cachekey], **kwargs)
 488         else:
 489             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 490
 491
 492 class function(_data):
 493
 494     defaultstyles = [style.line()]
 495
 496     assignmentpattern = re.compile(r"\s*([a-z_][a-z0-9_]*)\s*\(\s*([a-z_][a-z0-9_]*)\s*\)\s*=", re.IGNORECASE)
 497
 498     def __init__(self, expression, title=_notitle, min=None, max=None,
 499                  points=100, context={}):
 500
 501         if title is _notitle:
 502             self.title = expression
 503         else:
 504             self.title = title
 505         self.min = min
 506         self.max = max
 507         self.numberofpoints = points
 508         self.context = context.copy() # be save on late evaluations
 509         m = self.assignmentpattern.match(expression)
 510         if m:
 511             self.yname, self.xname = m.groups()
 512             expression = expression[m.end():]
 513         else:
 514             raise ValueError("y(x)=... or similar expected")
 515         if context.has_key(self.xname):
 516             raise ValueError("xname in context")
 517         self.expression = compile(expression.strip(), __file__, "eval")
 518         self.columns = {}
 519         self.columnnames = [self.xname, self.yname]
 520
 521     def dynamiccolumns(self, graph):
 522         dynamiccolumns = {self.xname: [], self.yname: []}
 523
 524         xaxis = graph.axes[self.xname]
 525         from pyx.graph.axis import logarithmic
 526         logaxis = isinstance(xaxis.axis, logarithmic)
 527         if self.min is not None:
 528             min = self.min
 529         else:
 530             min = xaxis.data.min
 531         if self.max is not None:
 532             max = self.max
 533         else:
 534             max = xaxis.data.max
 535         if logaxis:
 536             min = math.log(min)
 537             max = math.log(max)
 538         for i in range(self.numberofpoints):
 539             x = min + (max-min)*i / (self.numberofpoints-1.0)
 540             if logaxis:
 541                 x = math.exp(x)
 542             dynamiccolumns[self.xname].append(x)
 543             self.context[self.xname] = x
 544             try:
 545                 y = eval(self.expression, _mathglobals, self.context)
 546             except (ArithmeticError, ValueError):
 547                 y = None
 548             dynamiccolumns[self.yname].append(y)
 549         return dynamiccolumns
 550
 551
 552 class functionxy(function):
 553
 554     def __init__(self, f, min=None, max=None, **kwargs):
 555         function.__init__(self, "y(x)=f(x)", context={"f": f}, min=min, max=max, **kwargs)
 556
 557
 558 class paramfunction(_data):
 559
 560     defaultstyles = [style.line()]
 561
 562     def __init__(self, varname, min, max, expression, title=_notitle, points=100, context={}):
 563         if context.has_key(varname):
 564             raise ValueError("varname in context")
 565         if title is _notitle:
 566             self.title = expression
 567         else:
 568             self.title = title
 569         varlist, expression = expression.split("=")
 570         expression = compile(expression.strip(), __file__, "eval")
 571         keys = [key.strip() for key in varlist.split(",")]
 572         self.columns = dict([(key, []) for key in keys])
 573         context = context.copy()
 574         for i in range(points):
 575             param = min + (max-min)*i / (points-1.0)
 576             context[varname] = param
 577             values = eval(expression, _mathglobals, context)
 578             for key, value in zip(keys, values):
 579                 self.columns[key].append(value)
 580         if len(keys) != len(values):
 581             raise ValueError("unpack tuple of wrong size")
 582         self.columnnames = self.columns.keys()
 583
 584
 585 class paramfunctionxy(paramfunction):
 586
 587     def __init__(self, f, min, max, **kwargs):
 588         paramfunction.__init__(self, "t", min, max, "x, y = f(t)", context={"f": f}, **kwargs)