pyx/graph/data.py

   1 #!/usr/bin/env python
   2 # -*- coding: ISO-8859-1 -*-
   3 #
   4 #
   5 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
   6 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
   7 # Copyright (C) 2002-2004 André Wobst <wobsta@users.sourceforge.net>
   8 #
   9 # This file is part of PyX (http://pyx.sourceforge.net/).
  10 #
  11 # PyX is free software; you can redistribute it and/or modify
  12 # it under the terms of the GNU General Public License as published by
  13 # the Free Software Foundation; either version 2 of the License, or
  14 # (at your option) any later version.
  15 #
  16 # PyX is distributed in the hope that it will be useful,
  17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 # GNU General Public License for more details.
  20 #
  21 # You should have received a copy of the GNU General Public License
  22 # along with PyX; if not, write to the Free Software
  23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  24
  25
  26 import re, ConfigParser
  27 from pyx import mathtree
  28 from pyx.graph import style
  29
  30
  31 class _Idata:
  32     """interface definition of a data object
  33     data objects store data arranged in rows and columns"""
  34
  35     columns = {}
  36     """a dictionary mapping column titles to column numbers"""
  37
  38     points = []
  39     """column/row data
  40     - a list of rows where each row represents a data point
  41     - each row contains a list, where each entry of the list represents a value for a column
  42     - the number of columns for each data point must match the number of columns
  43     - any column enty of any data point might be a float, a string, or None"""
  44
  45     title = ""
  46     """a string (for printing in PyX, e.g. in a graph key)
  47     - None is allowed, which marks the data instance to have no title,
  48       e.g. it should be skiped in a graph key etc.
  49     - the title does need to be unique"""
  50
  51     def getcolumnnumber(self, column):
  52         """returns a column number
  53         - the column parameter might be an integer to be used as a column number
  54         - a column number must be a valid list index (negative values are allowed)
  55         - the column parameter might be a string contained in the columns list;
  56           to be valid, the string must be unique within the columns list"""
  57
  58     def getcolumn(self, column):
  59         """returns a column
  60         - extracts a column out of self.data and returns it as a list
  61         - the column is identified by the parameter column as in getcolumnnumber"""
  62
  63
  64 class _data:
  65
  66     defaultstyle = style.symbol()
  67
  68     def getcolumnnumber(self, key):
  69         try:
  70             key + ""
  71         except:
  72             return key + 0
  73         else:
  74             return self.columns[key.strip()]
  75
  76     def getcolumn(self, key):
  77         columnno = self.getcolumnnumber(key)
  78         return [point[columnno] for point in self.points]
  79
  80     def setstyle(self, graph, style):
  81         self.style = style
  82         unhandledcolumns = self.style.setdata(graph, self.columns, self)
  83         unhandledcolumnkeys = unhandledcolumns.keys()
  84         if len(unhandledcolumnkeys):
  85             raise ValueError("style couldn't handle column keys %s" % unhandledcolumnkeys)
  86
  87     def selectstyle(self, graph, selectindex, selecttotal):
  88         self.style.selectstyle(selectindex, selecttotal, self)
  89
  90     def adjustaxes(self, graph, step):
  91         """
  92         - on step == 0 axes with fixed data should be adjusted
  93         - on step == 1 the current axes ranges might be used to
  94           calculate further data (e.g. y data for a function y=f(x)
  95           where the y range depends on the x range)
  96         - on step == 2 axes ranges not previously set should be
  97           updated by data accumulated by step 1"""
  98         if step == 0:
  99             self.style.adjustaxes(self.columns.values(), self)
 100
 101     def draw(self, graph):
 102         self.style.drawpoints(graph, self)
 103
 104
 105 class list(_data):
 106     "creates data out of points"
 107
 108     def __init__(self, points, title="unknown", maxcolumns=None, addlinenumbers=1, **columns):
 109         if maxcolumns is None and len(points):
 110             maxcolumns = max([len(point) for point in points])
 111         if addlinenumbers:
 112             for i in xrange(len(points)):
 113                 points[i] = [i+1] + points[i] + [None] * (maxcolumns - len(points[i]))
 114         else:
 115             for i in xrange(len(points)):
 116                 points[i] = points[i] + [None] * (maxcolumns - len(points[i]))
 117         self.points = points
 118         self.columns = columns
 119         self.title = title
 120
 121
 122 ##############################################################
 123 # math tree enhanced by column handling
 124 ##############################################################
 125
 126 class MathTreeFuncCol(mathtree.MathTreeFunc1):
 127
 128     def __init__(self, *args):
 129         mathtree.MathTreeFunc1.__init__(self, "_column_", *args)
 130
 131     def VarList(self):
 132         # we misuse VarList here:
 133         # - instead of returning a string, we return this instance itself
 134         # - before calculating the expression, you must call ColumnNameAndNumber
 135         #   once (when limiting the context to external defined variables,
 136         #   otherwise you have to call it each time)
 137         return [self]
 138
 139     def ColumnNameAndNumber(_hidden_self, **args):
 140         number = int(_hidden_self.Args[0].Calc(**args))
 141         _hidden_self.varname = "_column_%i" % number
 142         return _hidden_self.varname, number
 143
 144     def __str__(self):
 145         return self.varname
 146
 147     def Calc(_hidden_self, **args):
 148         return args[_hidden_self.varname]
 149
 150 MathTreeFuncsWithCol = mathtree.DefaultMathTreeFuncs + [MathTreeFuncCol]
 151
 152
 153 class columntree:
 154
 155     def __init__(self, tree):
 156         self.tree = tree
 157         self.Calc = tree.Calc
 158         self.__str__ = tree.__str__
 159
 160     def VarList(self):
 161         # returns a list of regular variables (strings) like the original mathtree
 162         return [var for var in self.tree.VarList() if not isinstance(var, MathTreeFuncCol) and var[:8] != "_column_"]
 163
 164     def columndict(_hidden_self, **context):
 165         # returns a dictionary of column names (keys) and column numbers (values)
 166         columndict = {}
 167         for var in _hidden_self.tree.VarList():
 168             if isinstance(var, MathTreeFuncCol):
 169                 name, number = var.ColumnNameAndNumber(**context)
 170                 columndict[name] = number
 171             elif var[:8] == "_column_":
 172                 columndict[var] = int(var[8:])
 173         return columndict
 174
 175
 176 class dataparser(mathtree.parser):
 177     # mathtree parser enhanced by column handling
 178     # parse returns a columntree instead of a regular tree
 179
 180     def __init__(self, MathTreeFuncs=MathTreeFuncsWithCol, **kwargs):
 181         mathtree.parser.__init__(self, MathTreeFuncs=MathTreeFuncs, **kwargs)
 182
 183     def parse(self, expr):
 184         return columntree(mathtree.parser.parse(self, expr.replace("$", "_column_")))
 185
 186 ##############################################################
 187
 188
 189 class copycolumn:
 190     # a helper storage class to mark a new column to copied
 191     # out of data from an old column
 192     def __init__(self, newcolumntitle, oldcolumnnumber):
 193         self.newcolumntitle = newcolumntitle
 194         self.oldcolumnnumber = oldcolumnnumber
 195
 196 class mathcolumn:
 197     """a helper storage class to mark a new column to created
 198     by evaluating a mathematical expression"""
 199     def __init__(self, newcolumntitle, expression, tree, varitems):
 200         # - expression is a string
 201         # - tree is a parsed mathematical tree, e.g. we can have
 202         #   call tree.Calc(**vars), where the dict vars maps variable
 203         #   names to values
 204         # - varitems is a list of (key, value) pairs, where the key
 205         #   stands is a variable name in the mathematical tree and
 206         #   the value is its value"""
 207         self.newcolumntitle = newcolumntitle
 208         self.expression = expression
 209         self.tree = tree
 210         self.varitems = varitems
 211
 212 class notitle:
 213     """this is a helper class to mark, that no title was privided
 214     (since a title equals None is a valid input, it needs to be
 215     distinguished from providing no title when a title will be
 216     created automatically)"""
 217     pass
 218
 219 class data(_data):
 220     "creates a new data set out of an existing data set"
 221
 222     def __init__(self, data, title=notitle, parser=dataparser(), context={}, **columns):
 223         defaultstyle = data.defaultstyle
 224
 225         # build a nice title
 226         if title is notitle:
 227             items = columns.items()
 228             items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
 229             self.title = data.title + ": " + ", ".join(["%s=%s" % item for item in items])
 230         else:
 231             self.title = title
 232
 233         # analyse the **columns argument
 234         newcolumns = []
 235         hasmathcolumns = 0
 236         for newcolumntitle, columnexpr in columns.items():
 237             try:
 238                 # try if it is a valid column identifier
 239                 oldcolumnnumber = data.getcolumnnumber(columnexpr)
 240             except:
 241                 # if not it should be a mathematical expression
 242                 tree = parser.parse(columnexpr)
 243                 columndict = tree.columndict(**context)
 244                 for var in tree.VarList():
 245                     try:
 246                         columndict[var] = data.getcolumnnumber(var)
 247                     except KeyError, e:
 248                         if var not in context.keys():
 249                             raise e
 250                 newcolumns.append(mathcolumn(newcolumntitle, columnexpr, tree, columndict.items()))
 251                 hasmathcolumns = 1
 252             else:
 253                 newcolumns.append(copycolumn(newcolumntitle, oldcolumnnumber))
 254
 255         # ensure to copy the zeroth column (line number)
 256         # if we already do, place it first again, otherwise add it to the front
 257         i = 0
 258         for newcolumn in newcolumns:
 259             if isinstance(newcolumn, copycolumn) and not newcolumn.oldcolumnnumber:
 260                 newcolumns.pop(i)
 261                 newcolumns.insert(0, newcolumn)
 262                 firstcolumnwithtitle = 0
 263                 break
 264             i += 1
 265         else:
 266             newcolumns.insert(0, copycolumn(None, 0))
 267             firstcolumnwithtitle = 1
 268
 269         if hasmathcolumns:
 270             # new column data needs to be calculated
 271             vars = context.copy() # do not modify context, use a copy vars instead
 272             self.points = [None]*len(data.points)
 273             countcolumns = len(newcolumns)
 274             for i in xrange(len(data.points)):
 275                 datapoint = data.points[i]
 276                 point = [None]*countcolumns
 277                 newcolumnnumber = 0
 278                 for newcolumn in newcolumns:
 279                     if isinstance(newcolumn, copycolumn):
 280                         point[newcolumnnumber] = datapoint[newcolumn.oldcolumnnumber]
 281                     else:
 282                         # update the vars
 283                         # TODO: we could update it once for all varitems
 284                         for newcolumntitle, value in newcolumn.varitems:
 285                             vars[newcolumntitle] = datapoint[value]
 286                         point[newcolumnnumber] = newcolumn.tree.Calc(**vars)
 287                         # we could also do:
 288                         # point[newcolumnnumber] = eval(str(newcolumn.tree), vars)
 289                     newcolumnnumber += 1
 290                 self.points[i] = point
 291
 292             # store the column titles
 293             self.columns = {}
 294             newcolumnnumber = firstcolumnwithtitle
 295             for newcolumn in newcolumns[firstcolumnwithtitle:]:
 296                 self.columns[newcolumn.newcolumntitle] = newcolumnnumber
 297                 newcolumnnumber += 1
 298         else:
 299             # since only column copies are needed, we can share the original points
 300             self.points = data.points
 301
 302             # store the new column titles
 303             self.columns = {}
 304             for newcolumn in newcolumns[firstcolumnwithtitle:]:
 305                 self.columns[newcolumn.newcolumntitle] = newcolumn.oldcolumnnumber
 306
 307
 308 filecache = {}
 309
 310 class file(data):
 311
 312     defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
 313     defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
 314     defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
 315
 316     def splitline(self, line, stringpattern, columnpattern, tofloat=1):
 317         """returns a tuple created out of the string line
 318         - matches stringpattern and columnpattern, adds the first group of that
 319           match to the result and and removes those matches until the line is empty
 320         - when stringpattern matched, the result is always kept as a string
 321         - when columnpattern matched and tofloat is true, a conversion to a float
 322           is tried; when this conversion fails, the string is kept"""
 323         result = []
 324         # try to gain speed by skip matching regular expressions
 325         if line.find('"')!=-1 or \
 326            stringpattern is not self.defaultstringpattern or \
 327            columnpattern is not self.defaultcolumnpattern:
 328             while len(line):
 329                 match = stringpattern.match(line)
 330                 if match:
 331                     result.append(match.groups()[0])
 332                     line = line[match.end():]
 333                 else:
 334                     match = columnpattern.match(line)
 335                     if tofloat:
 336                         try:
 337                             result.append(float(match.groups()[0]))
 338                         except (TypeError, ValueError):
 339                             result.append(match.groups()[0])
 340                     else:
 341                         result.append(match.groups()[0])
 342                     line = line[match.end():]
 343         else:
 344             if tofloat:
 345                 try:
 346                     return map(float, line.split())
 347                 except (TypeError, ValueError):
 348                     result = []
 349                     for r in line.split():
 350                         try:
 351                             result.append(float(r))
 352                         except (TypeError, ValueError):
 353                             result.append(r)
 354             else:
 355                 return line.split()
 356         return result
 357
 358     def getcachekey(self, *args):
 359         return ":".join([str(x) for x in args])
 360
 361     def __init__(self, filename,
 362                        commentpattern=defaultcommentpattern,
 363                        stringpattern=defaultstringpattern,
 364                        columnpattern=defaultcolumnpattern,
 365                        skiphead=0, skiptail=0, every=1,
 366                        **kwargs):
 367         cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
 368         if not filecache.has_key(cachekey):
 369             file = open(filename)
 370             self.title = filename
 371             columns = {}
 372             points = []
 373             linenumber = 0
 374             maxcolumns = 0
 375             for line in file.readlines():
 376                 line = line.strip()
 377                 match = commentpattern.match(line)
 378                 if match:
 379                     if not len(points):
 380                         keys = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
 381                         i = 0
 382                         for key in keys:
 383                             i += 1
 384                             columns[key] = i
 385                 else:
 386                     linedata = []
 387                     for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
 388                         linedata.append(value)
 389                     if len(linedata):
 390                         if linenumber >= skiphead and not ((linenumber - skiphead) % every):
 391                             linedata = [linenumber + 1] + linedata
 392                             if len(linedata) > maxcolumns:
 393                                 maxcolumns = len(linedata)
 394                             points.append(linedata)
 395                         linenumber += 1
 396             if skiptail:
 397                 del points[-skiptail:]
 398             filecache[cachekey] = list(points, title=filename, maxcolumns=maxcolumns, addlinenumbers=0, **columns)
 399         data.__init__(self, filecache[cachekey], **kwargs)
 400
 401
 402 conffilecache = {}
 403
 404 class conffile(data):
 405
 406     def __init__(self, filename, **kwargs):
 407         """read data from a config-like file
 408         - filename is a string
 409         - each row is defined by a section in the config-like file (see
 410           config module description)
 411         - the columns for each row are defined by lines in the section file;
 412           the option entries identify and name the columns
 413         - further keyword arguments are passed to the constructor of data,
 414           keyword arguments data and titles excluded"""
 415         cachekey = filename
 416         if not filecache.has_key(cachekey):
 417             config = ConfigParser.ConfigParser()
 418             config.optionxform = str
 419             config.readfp(open(filename, "r"))
 420             sections = config.sections()
 421             sections.sort()
 422             points = [None]*len(sections)
 423             maxcolumns = 1
 424             columns = {}
 425             for i in xrange(len(sections)):
 426                 point = [sections[i]] + [None]*(maxcolumns-1)
 427                 for option in config.options(sections[i]):
 428                     value = config.get(sections[i], option)
 429                     try:
 430                         value = float(value)
 431                     except:
 432                         pass
 433                     try:
 434                         index = columns[option]
 435                     except KeyError:
 436                         columns[option] = maxcolumns
 437                         point.append(value)
 438                         maxcolumns += 1
 439                     else:
 440                         point[index] = value
 441                 points[i] = point
 442             conffilecache[cachekey] = list(points, title=filename, maxcolumns=maxcolumns, addlinenumbers=0, **columns)
 443         data.__init__(self, conffilecache[cachekey], **kwargs)
 444
 445
 446
 447 class function:
 448
 449     defaultstyle = style.line()
 450
 451     def __init__(self, expression, title=notitle, min=None, max=None, points=100, parser=mathtree.parser(), context={}):
 452         if title is notitle:
 453             self.title = expression
 454         else:
 455             self.title = title
 456         self.min = min
 457         self.max = max
 458         self.nopoints = points
 459         self.context = context
 460         self.result, expression = [x.strip() for x in expression.split("=")]
 461         self.mathtree = parser.parse(expression)
 462         self.variable = None
 463
 464     def setstyle(self, graph, style):
 465         self.style = style
 466         for variable in self.mathtree.VarList():
 467             if variable in graph.axes.keys():
 468                 if self.variable is None:
 469                     self.variable = variable
 470                 else:
 471                     raise ValueError("multiple variables found")
 472         if self.variable is None:
 473             raise ValueError("no variable found")
 474         self.xaxis = graph.axes[self.variable]
 475         unhandledcolumns = self.style.setdata(graph, {self.variable: 0, self.result: 1}, self)
 476         unhandledcolumnkeys = unhandledcolumns.keys()
 477         if len(unhandledcolumnkeys):
 478             raise ValueError("style couldn't handle column keys %s" % unhandledcolumnkeys)
 479
 480     def selectstyle(self, graph, selectindex, selecttotal):
 481         self.style.selectstyle(selectindex, selecttotal, self)
 482
 483     def adjustaxes(self, graph, step):
 484         """
 485         - on step == 0 axes with fixed data should be adjusted
 486         - on step == 1 the current axes ranges might be used to
 487           calculate further data (e.g. y data for a function y=f(x)
 488           where the y range depends on the x range)
 489         - on step == 2 axes ranges not previously set should be
 490           updated by data accumulated by step 1"""
 491         if step == 0:
 492             min, max = graph.axes[self.variable].getrange()
 493             if self.min is not None: min = self.min
 494             if self.max is not None: max = self.max
 495             vmin = self.xaxis.convert(min)
 496             vmax = self.xaxis.convert(max)
 497             self.points = []
 498             for i in range(self.nopoints):
 499                 x = self.xaxis.invert(vmin + (vmax-vmin)*i / (self.nopoints-1.0))
 500                 self.points.append([x])
 501             self.style.adjustaxes([0], self)
 502         elif step == 1:
 503             for point in self.points:
 504                 self.context[self.variable] = point[0]
 505                 try:
 506                     point.append(self.mathtree.Calc(**self.context))
 507                 except (ArithmeticError, ValueError):
 508                     point.append(None)
 509         elif step == 2:
 510             self.style.adjustaxes([1], self)
 511
 512     def draw(self, graph):
 513         self.style.drawpoints(graph, self)
 514
 515
 516 class paramfunction:
 517
 518     defaultstyle = style.line()
 519
 520     def __init__(self, varname, min, max, expression, title=notitle, points=100, parser=mathtree.parser(), context={}):
 521         if title is notitle:
 522             self.title = expression
 523         else:
 524             self.title = title
 525         self.varname = varname
 526         self.min = min
 527         self.max = max
 528         self.nopoints = points
 529         self.expression = {}
 530         self.mathtrees = {}
 531         varlist, expressionlist = expression.split("=")
 532         keys = varlist.split(",")
 533         mtrees = parser.parse(expressionlist)
 534         if len(keys) != len(mtrees):
 535             raise ValueError("unpack tuple of wrong size")
 536         for i in range(len(keys)):
 537             key = keys[i].strip()
 538             if self.mathtrees.has_key(key):
 539                 raise ValueError("multiple assignment in tuple")
 540             self.mathtrees[key] = mtrees[i]
 541         if len(keys) != len(self.mathtrees.keys()):
 542             raise ValueError("unpack tuple of wrong size")
 543         self.points = []
 544         for i in range(self.nopoints):
 545             context[self.varname] = self.min + (self.max-self.min)*i / (self.nopoints-1.0)
 546             line = []
 547             for key, tree in self.mathtrees.items():
 548                 line.append(tree.Calc(**context))
 549             self.points.append(line)
 550
 551     def setstyle(self, graph, style):
 552         self.style = style
 553         columns = {}
 554         index = 0
 555         for key in self.mathtrees.keys():
 556             columns[key] = index
 557             index += 1
 558         unhandledcolumns = self.style.setdata(graph, columns, self)
 559         unhandledcolumnkeys = unhandledcolumns.keys()
 560         if len(unhandledcolumnkeys):
 561             raise ValueError("style couldn't handle column keys %s" % unhandledcolumnkeys)
 562
 563     def selectstyle(self, graph, selectindex, selecttotal):
 564         self.style.selectstyle(selectindex, selecttotal, self)
 565
 566     def adjustaxes(self, graph, step):
 567         if step == 0:
 568             self.style.adjustaxes(range(len(self.mathtrees.items())), self)
 569
 570     def draw(self, graph):
 571         self.style.drawpoints(graph, self)
 572