pyx/graph/data.py

   1 #!/usr/bin/env python
   2 # -*- coding: ISO-8859-1 -*-
   3 #
   4 #
   5 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
   6 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
   7 # Copyright (C) 2002-2004 André Wobst <wobsta@users.sourceforge.net>
   8 #
   9 # This file is part of PyX (http://pyx.sourceforge.net/).
  10 #
  11 # PyX is free software; you can redistribute it and/or modify
  12 # it under the terms of the GNU General Public License as published by
  13 # the Free Software Foundation; either version 2 of the License, or
  14 # (at your option) any later version.
  15 #
  16 # PyX is distributed in the hope that it will be useful,
  17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 # GNU General Public License for more details.
  20 #
  21 # You should have received a copy of the GNU General Public License
  22 # along with PyX; if not, write to the Free Software
  23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  24
  25
  26 import math, re, ConfigParser, struct, warnings
  27 from pyx import mathtree, text
  28 from pyx.style import linestyle
  29 from pyx.graph import style
  30
  31 try:
  32     enumerate([])
  33 except NameError:
  34     # fallback implementation for Python 2.2 and below
  35     def enumerate(list):
  36         return zip(xrange(len(list)), list)
  37
  38 try:
  39     dict()
  40 except NameError:
  41     # fallback implementation for Python 2.1
  42     def dict(items):
  43         result = {}
  44         for key, value in items:
  45             result[key] = value
  46         return result
  47
  48
  49 class _data:
  50     """graph data interface
  51
  52     Graph data consists in columns, where each column might be identified by a
  53     string or an integer. Each row in the resulting table refers to a data
  54     point.
  55
  56     All methods except for the constructor should consider self and its
  57     attributes to be readonly, since the data instance might be shared between
  58     several graphs simultaniously.
  59
  60     The instance variable columns is a dictionary mapping column names to the
  61     data of the column (i.e. to a list). Only static columns (known at
  62     construction time) are contained in that dictionary. For data with numbered
  63     columns the column data is also available via the list columndata.
  64     Otherwise the columndata list should be missing and an access to a column
  65     number will fail.
  66
  67     The instance variable title and defaultstyles contain the data title and
  68     the default styles (a list of styles), respectively.
  69     """
  70
  71     def columnnames(self, graph):
  72         """return a list of column names
  73
  74         Currently the column names might depend on the axes names. This dynamic
  75         nature is subject of removal for the future. Then the method could be
  76         replaced by an instance variable already initialized in the contructor.
  77
  78         The result will be self.columns.keys() + self.dynamiccolums.keys(), but
  79         the later can only be called after the static axes ranges have been
  80         fixed. OTOH the column names are already needed in the initialization
  81         process of the styles sharedata and privatedata.
  82         """
  83         return self.columns.keys()
  84
  85     def dynamiccolumns(self, graph):
  86         """create and return dynamic columns data
  87
  88         Returns dynamic data matching the given axes (the axes range and other
  89         data might be used). The return value is a dictionary similar to the
  90         columns instance variable.
  91         """
  92         return {}
  93
  94
  95 class list(_data):
  96     "Graph data from a list of points"
  97
  98     defaultstyles = [style.symbol()]
  99
 100     def __init__(self, points, title="user provided list", addlinenumbers=1, **columns):
 101         if len(points):
 102             l = len(points[0])
 103             self.columndata = [[x] for x in points[0]]
 104             for point in points[1:]:
 105                 if l != len(point):
 106                     raise ValueError("different number of columns per point")
 107                 for i, x in enumerate(point):
 108                     self.columndata[i].append(x)
 109             for v in columns.values():
 110                 if abs(v) > l or (not addlinenumbers and abs(v) == l):
 111                     raise ValueError("column number bigger than number of columns")
 112             if addlinenumbers:
 113                 self.columndata = [range(1, len(points) + 1)] + self.columndata
 114             self.columns = dict([(key, self.columndata[i]) for key, i in columns.items()])
 115         else:
 116             self.columns = dict([(key, []) for key, i in columns])
 117         self.title = title
 118         self.defaultstyles = [style.symbol()]
 119
 120
 121
 122 ##############################################################
 123 # math tree enhanced by column number variables
 124 ##############################################################
 125
 126 class MathTreeFuncCol(mathtree.MathTreeFunc1):
 127
 128     def __init__(self, *args):
 129         mathtree.MathTreeFunc1.__init__(self, "_column_", *args)
 130
 131     def VarList(self):
 132         # we misuse VarList here:
 133         # - instead of returning a string, we return this instance itself
 134         # - before calculating the expression, you must call ColumnNameAndNumber
 135         #   once (when limiting the context to external defined variables,
 136         #   otherwise you have to call it each time)
 137         return [self]
 138
 139     def ColumnNameAndNumber(_hidden_self, **args):
 140         number = int(_hidden_self.Args[0].Calc(**args))
 141         _hidden_self.varname = "_column_%i" % number
 142         return _hidden_self.varname, number
 143
 144     def __str__(self):
 145         return self.varname
 146
 147     def Calc(_hidden_self, **args):
 148         return args[_hidden_self.varname]
 149
 150 MathTreeFuncsWithCol = mathtree.DefaultMathTreeFuncs + [MathTreeFuncCol]
 151
 152
 153 class columntree:
 154
 155     def __init__(self, tree):
 156         self.tree = tree
 157         self.Calc = tree.Calc
 158         self.__str__ = tree.__str__
 159
 160     def VarList(self):
 161         # returns a list of regular variables (strings) like the original mathtree
 162         return [var for var in self.tree.VarList() if not isinstance(var, MathTreeFuncCol) and var[:8] != "_column_"]
 163
 164     def columndict(_hidden_self, **context):
 165         # returns a dictionary of column names (keys) and column numbers (values)
 166         columndict = {}
 167         for var in _hidden_self.tree.VarList():
 168             if isinstance(var, MathTreeFuncCol):
 169                 name, number = var.ColumnNameAndNumber(**context)
 170                 columndict[name] = number
 171             elif var[:8] == "_column_":
 172                 columndict[var] = int(var[8:])
 173         return columndict
 174
 175
 176 class dataparser(mathtree.parser):
 177     # mathtree parser enhanced by column handling
 178     # parse returns a columntree instead of a regular tree
 179
 180     def __init__(self, MathTreeFuncs=MathTreeFuncsWithCol, **kwargs):
 181         mathtree.parser.__init__(self, MathTreeFuncs=MathTreeFuncs, **kwargs)
 182
 183     def parse(self, expr):
 184         return columntree(mathtree.parser.parse(self, expr.replace("$", "_column_")))
 185
 186 ##############################################################
 187
 188
 189 class _notitle:
 190     pass
 191
 192 class data(_data):
 193     "creates a new data set out of an existing data set"
 194
 195     def __init__(self, data, title=_notitle, parser=dataparser(), context={}, copy=1, **columns):
 196         # build a nice title
 197         if title is _notitle:
 198             items = columns.items()
 199             items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
 200             self.title = "%s: %s" % (data.title,
 201                                      ", ".join(["%s=%s" % (text.escapestring(key),
 202                                                            text.escapestring(value))
 203                                                 for key, value in items]))
 204         else:
 205             self.title = title
 206
 207         self.orgdata = data
 208         self.defaultstyles = self.orgdata.defaultstyles
 209
 210         # analyse the **columns argument
 211         self.columns = {}
 212         for columnname, value in columns.items():
 213             try:
 214                 self.columns[columnname] = self.orgdata.columns[value]
 215             except:
 216                 pass
 217             try:
 218                 self.columns[columnname] = self.orgdata.columndata[value]
 219             except:
 220                 pass
 221             # value was not an valid column identifier
 222             if not self.columns.has_key(columnname):
 223                 # take it as a mathematical expression
 224                 tree = parser.parse(value)
 225                 columndict = tree.columndict(**context)
 226                 vars = {}
 227                 for var, columnnumber in columndict.items():
 228                     # column data accessed via $<column number>
 229                     vars[var] = self.orgdata.columndata[columnnumber]
 230                 for var in tree.VarList():
 231                     try:
 232                         # column data accessed via the name of the column
 233                         vars[var] = self.orgdata.columns[var]
 234                     except (KeyError, ValueError):
 235                         # other data available in context
 236                         if var not in context.keys():
 237                             raise ValueError("undefined variable '%s'" % var)
 238                 newdata = []
 239                 usevars = context.copy() # do not modify context, use a copy vars instead
 240                 if self.orgdata.columns:
 241                     key, columndata = self.orgdata.columns.items()[0]
 242                     count = len(columndata)
 243                 elif self.orgdata.columndata:
 244                     count = len(self.orgdata.columndata[0])
 245                 else:
 246                     count = 0
 247                 for i in xrange(count):
 248                     # insert column data as prepared in vars
 249                     for var, columndata in vars.items():
 250                         usevars[var] = columndata[i]
 251                     # evaluate expression
 252                     try:
 253                         newdata.append(tree.Calc(**usevars))
 254                     except (ArithmeticError, ValueError):
 255                         newdata.append(None)
 256                     # we could also do:
 257                     # point[newcolumnnumber] = eval(str(tree), vars)
 258
 259                     # XXX: It might happen, that the evaluation of the expression
 260                     #      seems to work, but the result is NaN/Inf/-Inf. This
 261                     #      is highly plattform dependend.
 262
 263                 self.columns[columnname] = newdata
 264
 265         if copy:
 266             # copy other, non-conflicting column names
 267             for columnname, columndata in self.orgdata.columns.items():
 268                 if not self.columns.has_key(columnname):
 269                     self.columns[columnname] = columndata
 270
 271     def getcolumnpointsindex(self, column):
 272         return self.columns[column]
 273
 274
 275 filecache = {}
 276
 277 class file(data):
 278
 279     defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
 280     defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
 281     defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
 282
 283     def splitline(self, line, stringpattern, columnpattern, tofloat=1):
 284         """returns a tuple created out of the string line
 285         - matches stringpattern and columnpattern, adds the first group of that
 286           match to the result and and removes those matches until the line is empty
 287         - when stringpattern matched, the result is always kept as a string
 288         - when columnpattern matched and tofloat is true, a conversion to a float
 289           is tried; when this conversion fails, the string is kept"""
 290         result = []
 291         # try to gain speed by skip matching regular expressions
 292         if line.find('"')!=-1 or \
 293            stringpattern is not self.defaultstringpattern or \
 294            columnpattern is not self.defaultcolumnpattern:
 295             while len(line):
 296                 match = stringpattern.match(line)
 297                 if match:
 298                     result.append(match.groups()[0])
 299                     line = line[match.end():]
 300                 else:
 301                     match = columnpattern.match(line)
 302                     if tofloat:
 303                         try:
 304                             result.append(float(match.groups()[0]))
 305                         except (TypeError, ValueError):
 306                             result.append(match.groups()[0])
 307                     else:
 308                         result.append(match.groups()[0])
 309                     line = line[match.end():]
 310         else:
 311             if tofloat:
 312                 try:
 313                     return map(float, line.split())
 314                 except (TypeError, ValueError):
 315                     result = []
 316                     for r in line.split():
 317                         try:
 318                             result.append(float(r))
 319                         except (TypeError, ValueError):
 320                             result.append(r)
 321             else:
 322                 return line.split()
 323         return result
 324
 325     def getcachekey(self, *args):
 326         return ":".join([str(x) for x in args])
 327
 328     def __init__(self, filename,
 329                        commentpattern=defaultcommentpattern,
 330                        stringpattern=defaultstringpattern,
 331                        columnpattern=defaultcolumnpattern,
 332                        skiphead=0, skiptail=0, every=1,
 333                        **kwargs):
 334
 335         def readfile(file, title, self=self, commentpattern=commentpattern, stringpattern=stringpattern, columnpattern=columnpattern, skiphead=skiphead, skiptail=skiptail, every=every):
 336             columns = []
 337             columndata = []
 338             linenumber = 0
 339             maxcolumns = 0
 340             for line in file.readlines():
 341                 line = line.strip()
 342                 match = commentpattern.match(line)
 343                 if match:
 344                     if not len(columndata):
 345                         columns = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
 346                 else:
 347                     linedata = []
 348                     for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
 349                         linedata.append(value)
 350                     if len(linedata):
 351                         if linenumber >= skiphead and not ((linenumber - skiphead) % every):
 352                             linedata = [linenumber + 1] + linedata
 353                             if len(linedata) > maxcolumns:
 354                                 maxcolumns = len(linedata)
 355                             columndata.append(linedata)
 356                         linenumber += 1
 357             if skiptail >= every:
 358                 skip, x = divmod(skiptail, every)
 359                 del columndata[-skip:]
 360             for i in xrange(len(columndata)):
 361                 if len(columndata[i]) != maxcolumns:
 362                     columndata[i].extend([None]*(maxcolumns-len(columndata[i])))
 363             return list(columndata, title=title, addlinenumbers=0,
 364                         **dict([(column, i+1) for i, column in enumerate(columns[:maxcolumns-1])]))
 365
 366         try:
 367             filename.readlines
 368         except:
 369             # not a file-like object -> open it
 370             cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
 371             if not filecache.has_key(cachekey):
 372                 filecache[cachekey] = readfile(open(filename), filename)
 373             data.__init__(self, filecache[cachekey], **kwargs)
 374         else:
 375             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 376
 377
 378 conffilecache = {}
 379
 380 class conffile(data):
 381
 382     def __init__(self, filename, **kwargs):
 383         """read data from a config-like file
 384         - filename is a string
 385         - each row is defined by a section in the config-like file (see
 386           config module description)
 387         - the columns for each row are defined by lines in the section file;
 388           the option entries identify and name the columns
 389         - further keyword arguments are passed to the constructor of data,
 390           keyword arguments data and titles excluded"""
 391
 392         def readfile(file, title):
 393             config = ConfigParser.ConfigParser()
 394             config.optionxform = str
 395             config.readfp(file)
 396             sections = config.sections()
 397             sections.sort()
 398             columndata = [None]*len(sections)
 399             maxcolumns = 1
 400             columns = {}
 401             for i in xrange(len(sections)):
 402                 point = [sections[i]] + [None]*(maxcolumns-1)
 403                 for option in config.options(sections[i]):
 404                     value = config.get(sections[i], option)
 405                     try:
 406                         value = float(value)
 407                     except:
 408                         pass
 409                     try:
 410                         index = columns[option]
 411                     except KeyError:
 412                         columns[option] = maxcolumns
 413                         point.append(value)
 414                         maxcolumns += 1
 415                     else:
 416                         point[index] = value
 417                 columndata[i] = point
 418             # wrap result into a data instance to remove column numbers
 419             result = data(list(columndata, addlinenumbers=0, **columns), title=title)
 420             # ... but reinsert sections as linenumbers
 421             result.columndata = [[x[0] for x in columndata]]
 422             return result
 423
 424         try:
 425             filename.readlines
 426         except:
 427             # not a file-like object -> open it
 428             if not filecache.has_key(filename):
 429                 filecache[filename] = readfile(open(filename), filename)
 430             data.__init__(self, filecache[filename], **kwargs)
 431         else:
 432             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 433
 434
 435 cbdfilecache = {}
 436
 437 class cbdfile(data):
 438
 439     def getcachekey(self, *args):
 440         return ":".join([str(x) for x in args])
 441
 442     def __init__(self, filename, minrank=None, maxrank=None, **kwargs):
 443
 444         class cbdhead:
 445
 446             def __init__(self, file):
 447                 (self.magic,
 448                  self.dictaddr,
 449                  self.segcount,
 450                  self.segsize,
 451                  self.segmax,
 452                  self.fill) = struct.unpack("<5i20s", file.read(40))
 453                 if self.magic != 0x20770002:
 454                     raise ValueError("bad magic number")
 455
 456         class segdict:
 457
 458             def __init__(self, file, i):
 459                 self.index = i
 460                 (self.segid,
 461                  self.maxlat,
 462                  self.minlat,
 463                  self.maxlong,
 464                  self.minlong,
 465                  self.absaddr,
 466                  self.nbytes,
 467                  self.rank) = struct.unpack("<6i2h", file.read(28))
 468
 469         class segment:
 470
 471             def __init__(self, file, sd):
 472                 file.seek(sd.absaddr)
 473                 (self.orgx,
 474                  self.orgy,
 475                  self.id,
 476                  self.nstrokes,
 477                  self.dummy) = struct.unpack("<3i2h", file.read(16))
 478                 oln, olt = self.orgx, self.orgy
 479                 self.points = [(olt, oln)]
 480                 for i in range(self.nstrokes):
 481                     c1, c2 = struct.unpack("2c", file.read(2))
 482                     if ord(c2) & 0x40:
 483                         if c1 > "\177":
 484                             dy = ord(c1) - 256
 485                         else:
 486                             dy = ord(c1)
 487                         if c2 > "\177":
 488                             dx = ord(c2) - 256
 489                         else:
 490                             dx = ord(c2) - 64
 491                     else:
 492                         c3, c4, c5, c6, c7, c8 = struct.unpack("6c", file.read(6))
 493                         if c2 > "\177":
 494                             c2 = chr(ord(c2) | 0x40)
 495                         dx, dy = struct.unpack("<2i", c3+c4+c1+c2+c7+c8+c5+c6)
 496                     oln += dx
 497                     olt += dy
 498                     self.points.append((olt, oln))
 499                 sd.nstrokes = self.nstrokes
 500
 501         def readfile(file, title):
 502             h = cbdhead(file)
 503             file.seek(h.dictaddr)
 504             sds = [segdict(file, i+1) for i in range(h.segcount)]
 505             sbs = [segment(file, sd) for sd in sds]
 506
 507             # remove jumps at long +/- 180
 508             for sd, sb in zip(sds, sbs):
 509                 if sd.minlong < -150*3600 and sd.maxlong > 150*3600:
 510                     for i, (lat, long) in enumerate(sb.points):
 511                          if long < 0:
 512                              sb.points[i] = lat, long + 360*3600
 513
 514             columndata = []
 515             for sd, sb in zip(sds, sbs):
 516                 if ((minrank is None or sd.rank >= minrank) and
 517                     (maxrank is None or sd.rank <= maxrank)):
 518                     if columndata:
 519                         columndata.append((None, None))
 520                     columndata.extend([(long/3600.0, lat/3600.0)
 521                                        for lat, long in sb.points])
 522
 523             result = list(columndata, title=title)
 524             result.defaultstyles = [style.line()]
 525             return result
 526
 527
 528         try:
 529             filename.readlines
 530         except:
 531             # not a file-like object -> open it
 532             cachekey = self.getcachekey(filename, minrank, maxrank)
 533             if not cbdfilecache.has_key(cachekey):
 534                 cbdfilecache[cachekey] = readfile(open(filename, "rb"), filename)
 535             data.__init__(self, cbdfilecache[cachekey], **kwargs)
 536         else:
 537             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 538
 539
 540
 541 class function(_data):
 542
 543     defaultstyles = [style.line()]
 544
 545     assignmentpattern = re.compile(r"\s*([a-z_][a-z0-9_]*)\s*\(\s*([a-z_][a-z0-9_]*)\s*\)\s*=", re.IGNORECASE)
 546
 547     def __init__(self, expression, title=_notitle, min=None, max=None,
 548                  points=100, parser=mathtree.parser(), context={}):
 549
 550         if title is _notitle:
 551             self.title = expression
 552         else:
 553             self.title = title
 554         self.min = min
 555         self.max = max
 556         self.numberofpoints = points
 557         self.context = context.copy() # be save on late evaluations
 558         m = self.assignmentpattern.match(expression)
 559         if m:
 560             self.yname, self.xname = m.groups()
 561             expression = expression[m.end():]
 562         else:
 563             warnings.warn("implicit variables are deprecated, use y(x)=... and the like", DeprecationWarning)
 564             self.xname = None
 565             self.yname, expression = [x.strip() for x in expression.split("=")]
 566         self.mathtree = parser.parse(expression)
 567         self.columns = {}
 568
 569     def columnnames(self, graph):
 570         if self.xname is None:
 571             for xname in self.mathtree.VarList():
 572                 if xname in graph.axes.keys():
 573                     if self.xname is None:
 574                         self.xname = xname
 575                     else:
 576                         raise ValueError("multiple variables found")
 577             if self.xname is None:
 578                 raise ValueError("no variable found")
 579         return [self.xname, self.yname]
 580
 581     def dynamiccolumns(self, graph):
 582         dynamiccolumns = {self.xname: [], self.yname: []}
 583
 584         xaxis = graph.axes[self.xname]
 585         from pyx.graph.axis import logarithmic
 586         logaxis = isinstance(xaxis.axis, logarithmic)
 587         if self.min is not None:
 588             min = self.min
 589         else:
 590             min = xaxis.data.min
 591         if self.max is not None:
 592             max = self.max
 593         else:
 594             max = xaxis.data.max
 595         if logaxis:
 596             min = math.log(min)
 597             max = math.log(max)
 598         for i in range(self.numberofpoints):
 599             x = min + (max-min)*i / (self.numberofpoints-1.0)
 600             if logaxis:
 601                 x = math.exp(x)
 602             dynamiccolumns[self.xname].append(x)
 603             self.context[self.xname] = x
 604             try:
 605                 y = self.mathtree.Calc(**self.context)
 606             except (ArithmeticError, ValueError):
 607                 y = None
 608             dynamiccolumns[self.yname].append(y)
 609         return dynamiccolumns
 610
 611
 612 class paramfunction(_data):
 613
 614     defaultstyles = [style.line()]
 615
 616     def __init__(self, varname, min, max, expression, title=_notitle, points=100, parser=mathtree.parser(), context={}):
 617         if title is _notitle:
 618             self.title = expression
 619         else:
 620             self.title = title
 621         varlist, expressionlist = expression.split("=")
 622         keys = [key.strip() for key in varlist.split(",")]
 623         mathtrees = parser.parse(expressionlist)
 624         if len(keys) != len(mathtrees):
 625             raise ValueError("unpack tuple of wrong size")
 626         self.columns = dict([(key, []) for key in keys])
 627         context = context.copy()
 628         for i in range(points):
 629             param = min + (max-min)*i / (points-1.0)
 630             context[varname] = param
 631             for key, mathtree in zip(keys, mathtrees):
 632                 try:
 633                     self.columns[key].append(mathtree.Calc(**context))
 634                 except (ArithmeticError, ValueError):
 635                     self.columns[key].append(None)