pyx/graph/data.py

   1 #!/usr/bin/env python
   2 # -*- coding: ISO-8859-1 -*-
   3 #
   4 #
   5 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
   6 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
   7 # Copyright (C) 2002-2004 André Wobst <wobsta@users.sourceforge.net>
   8 #
   9 # This file is part of PyX (http://pyx.sourceforge.net/).
  10 #
  11 # PyX is free software; you can redistribute it and/or modify
  12 # it under the terms of the GNU General Public License as published by
  13 # the Free Software Foundation; either version 2 of the License, or
  14 # (at your option) any later version.
  15 #
  16 # PyX is distributed in the hope that it will be useful,
  17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 # GNU General Public License for more details.
  20 #
  21 # You should have received a copy of the GNU General Public License
  22 # along with PyX; if not, write to the Free Software
  23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  24
  25
  26 import re, ConfigParser
  27 from pyx import mathtree
  28 from pyx.graph import style
  29
  30
  31 class _Idata:
  32     """interface definition of a data object
  33     data objects store data arranged in rows and columns"""
  34
  35     columns = {}
  36     """a dictionary mapping column titles to column numbers"""
  37
  38     points = []
  39     """column/row data
  40     - a list of rows where each row represents a data point
  41     - each row contains a list, where each entry of the list represents a value for a column
  42     - the number of columns for each data point must match the number of columns
  43     - any column enty of any data point might be a float, a string, or None"""
  44
  45     title = ""
  46     """a string (for printing in PyX, e.g. in a graph key)
  47     - None is allowed, which marks the data instance to have no title,
  48       e.g. it should be skiped in a graph key etc.
  49     - the title does need to be unique"""
  50
  51     def getcolumnnumber(self, column):
  52         """returns a column number
  53         - the column parameter might be an integer to be used as a column number
  54         - a column number must be a valid list index (negative values are allowed)
  55         - the column parameter might be a string contained in the columns list;
  56           to be valid, the string must be unique within the columns list"""
  57
  58     def getcolumn(self, column):
  59         """returns a column
  60         - extracts a column out of self.data and returns it as a list
  61         - the column is identified by the parameter column as in getcolumnnumber"""
  62
  63
  64 class styledata:
  65     """instances of this class are used to store data from the style(s)
  66     and to pass point data to the style(s) -- this storrage class is shared
  67     between all the style(s) in use by a data instance"""
  68     pass
  69
  70
  71 class _data:
  72
  73     defaultstyle = style.symbol()
  74
  75     def getcolumnnumber(self, key):
  76         try:
  77             key + ""
  78         except:
  79             return key + 0
  80         else:
  81             return self.columns[key.strip()]
  82
  83     def getcolumn(self, key):
  84         columnno = self.getcolumnnumber(key)
  85         return [point[columnno] for point in self.points]
  86
  87     def setstyles(self, graph, styles):
  88         provided = []
  89         addstyles = [] # a list of style instances to be added
  90         for s in styles:
  91             for need in s.need:
  92                 if need not in provided:
  93                     for addstyle in addstyles:
  94                         if need in addstyle.provide:
  95                             break
  96                     else:
  97                         addstyles.append(style.provider[need])
  98             provided.extend(s.provide)
  99
 100         self.styles = addstyles + styles
 101         self.styledata = styledata()
 102
 103         columns = self.columns.keys()
 104         usedcolumns = []
 105         for s in self.styles:
 106             usedcolumns.extend(s.columns(self.styledata, graph, columns))
 107         for column in columns:
 108             if column not in usedcolumns:
 109                 raise ValueError("unused column '%s'" % column)
 110
 111     def selectstyle(self, graph, selectindex, selecttotal):
 112         for style in self.styles:
 113             style.selectstyle(self.styledata, graph, selectindex, selecttotal)
 114
 115     def adjustaxes(self, graph, step):
 116         """
 117         - on step == 0 axes with fixed data should be adjusted
 118         - on step == 1 the current axes ranges might be used to
 119           calculate further data (e.g. y data for a function y=f(x)
 120           where the y range depends on the x range)
 121         - on step == 2 axes ranges not previously set should be
 122           updated by data accumulated by step 1"""
 123         if step == 0:
 124             for key, value in self.columns.items():
 125                 for style in self.styles:
 126                     style.adjustaxis(self.styledata, graph, key, self.points, value)
 127
 128     def draw(self, graph):
 129         columnsitems = self.columns.items()
 130         self.styledata.point = {}
 131         for style in self.styles:
 132             style.initdrawpoints(self.styledata, graph)
 133         for point in self.points:
 134             for key, value in columnsitems:
 135                 self.styledata.point[key] = point[value]
 136             for style in self.styles:
 137                 style.drawpoint(self.styledata, graph)
 138         for style in self.styles:
 139             style.donedrawpoints(self.styledata, graph)
 140
 141
 142 class list(_data):
 143     "creates data out of a list"
 144
 145     def checkmaxcolumns(self, points, maxcolumns=None):
 146         if maxcolumns is None:
 147             maxcolumns = max([len(point) for point in points])
 148         for i in xrange(len(points)):
 149             l = len(points[i])
 150             if l < maxcolumns:
 151                 try:
 152                     p = points[i] + [None] * (maxcolumns - l)
 153                 except:
 154                     # points[i] are not a list
 155                     p = __builtins__.list(points[i]) + [None] * (maxcolumns - l)
 156                 try:
 157                     points[i] = p
 158                 except:
 159                     # points are not a list -> end loop without step into else
 160                     break
 161         else:
 162             # the loop finished successfull
 163             return points
 164         # since points are not a list, convert them and try again
 165         return checkmaxcolumns(__builtins__.list(points), maxcolumns=maxcolumns)
 166
 167     def __init__(self, points, title="user provided list", maxcolumns=None, addlinenumbers=1, **columns):
 168         points = self.checkmaxcolumns(points, maxcolumns)
 169         if addlinenumbers:
 170             for i in xrange(len(points)):
 171                 try:
 172                     points[i].insert(0, i+1)
 173                 except:
 174                     points[i] = [i+1] + __builtins__.list(points[i])
 175         self.points = points
 176         self.columns = columns
 177         self.title = title
 178
 179
 180 ##############################################################
 181 # math tree enhanced by column handling
 182 ##############################################################
 183
 184 class MathTreeFuncCol(mathtree.MathTreeFunc1):
 185
 186     def __init__(self, *args):
 187         mathtree.MathTreeFunc1.__init__(self, "_column_", *args)
 188
 189     def VarList(self):
 190         # we misuse VarList here:
 191         # - instead of returning a string, we return this instance itself
 192         # - before calculating the expression, you must call ColumnNameAndNumber
 193         #   once (when limiting the context to external defined variables,
 194         #   otherwise you have to call it each time)
 195         return [self]
 196
 197     def ColumnNameAndNumber(_hidden_self, **args):
 198         number = int(_hidden_self.Args[0].Calc(**args))
 199         _hidden_self.varname = "_column_%i" % number
 200         return _hidden_self.varname, number
 201
 202     def __str__(self):
 203         return self.varname
 204
 205     def Calc(_hidden_self, **args):
 206         return args[_hidden_self.varname]
 207
 208 MathTreeFuncsWithCol = mathtree.DefaultMathTreeFuncs + [MathTreeFuncCol]
 209
 210
 211 class columntree:
 212
 213     def __init__(self, tree):
 214         self.tree = tree
 215         self.Calc = tree.Calc
 216         self.__str__ = tree.__str__
 217
 218     def VarList(self):
 219         # returns a list of regular variables (strings) like the original mathtree
 220         return [var for var in self.tree.VarList() if not isinstance(var, MathTreeFuncCol) and var[:8] != "_column_"]
 221
 222     def columndict(_hidden_self, **context):
 223         # returns a dictionary of column names (keys) and column numbers (values)
 224         columndict = {}
 225         for var in _hidden_self.tree.VarList():
 226             if isinstance(var, MathTreeFuncCol):
 227                 name, number = var.ColumnNameAndNumber(**context)
 228                 columndict[name] = number
 229             elif var[:8] == "_column_":
 230                 columndict[var] = int(var[8:])
 231         return columndict
 232
 233
 234 class dataparser(mathtree.parser):
 235     # mathtree parser enhanced by column handling
 236     # parse returns a columntree instead of a regular tree
 237
 238     def __init__(self, MathTreeFuncs=MathTreeFuncsWithCol, **kwargs):
 239         mathtree.parser.__init__(self, MathTreeFuncs=MathTreeFuncs, **kwargs)
 240
 241     def parse(self, expr):
 242         return columntree(mathtree.parser.parse(self, expr.replace("$", "_column_")))
 243
 244 ##############################################################
 245
 246
 247 class copycolumn:
 248     # a helper storage class to mark a new column to copied
 249     # out of data from an old column
 250     def __init__(self, newcolumntitle, oldcolumnnumber):
 251         self.newcolumntitle = newcolumntitle
 252         self.oldcolumnnumber = oldcolumnnumber
 253
 254 class mathcolumn:
 255     """a helper storage class to mark a new column to created
 256     by evaluating a mathematical expression"""
 257     def __init__(self, newcolumntitle, expression, tree, varitems):
 258         # - expression is a string
 259         # - tree is a parsed mathematical tree, e.g. we can have
 260         #   call tree.Calc(**vars), where the dict vars maps variable
 261         #   names to values
 262         # - varitems is a list of (key, value) pairs, where the key
 263         #   stands is a variable name in the mathematical tree and
 264         #   the value is its value"""
 265         self.newcolumntitle = newcolumntitle
 266         self.expression = expression
 267         self.tree = tree
 268         self.varitems = varitems
 269
 270 class notitle:
 271     """this is a helper class to mark, that no title was privided
 272     (since a title equals None is a valid input, it needs to be
 273     distinguished from providing no title when a title will be
 274     created automatically)"""
 275     pass
 276
 277 class data(_data):
 278     "creates a new data set out of an existing data set"
 279
 280     def __init__(self, data, title=notitle, parser=dataparser(), context={}, **columns):
 281         defaultstyle = data.defaultstyle
 282
 283         # build a nice title
 284         if title is notitle:
 285             items = columns.items()
 286             items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
 287             self.title = data.title + ": " + ", ".join(["%s=%s" % item for item in items])
 288         else:
 289             self.title = title
 290
 291         # analyse the **columns argument
 292         newcolumns = []
 293         hasmathcolumns = 0
 294         for newcolumntitle, columnexpr in columns.items():
 295             try:
 296                 # try if it is a valid column identifier
 297                 oldcolumnnumber = data.getcolumnnumber(columnexpr)
 298             except:
 299                 # if not it should be a mathematical expression
 300                 tree = parser.parse(columnexpr)
 301                 columndict = tree.columndict(**context)
 302                 for var in tree.VarList():
 303                     try:
 304                         columndict[var] = data.getcolumnnumber(var)
 305                     except KeyError, e:
 306                         if var not in context.keys():
 307                             raise e
 308                 newcolumns.append(mathcolumn(newcolumntitle, columnexpr, tree, columndict.items()))
 309                 hasmathcolumns = 1
 310             else:
 311                 newcolumns.append(copycolumn(newcolumntitle, oldcolumnnumber))
 312
 313         # ensure to copy the zeroth column (line number)
 314         # if we already do, place it first again, otherwise add it to the front
 315         i = 0
 316         for newcolumn in newcolumns:
 317             if isinstance(newcolumn, copycolumn) and not newcolumn.oldcolumnnumber:
 318                 newcolumns.pop(i)
 319                 newcolumns.insert(0, newcolumn)
 320                 firstcolumnwithtitle = 0
 321                 break
 322             i += 1
 323         else:
 324             newcolumns.insert(0, copycolumn(None, 0))
 325             firstcolumnwithtitle = 1
 326
 327         if hasmathcolumns:
 328             # new column data needs to be calculated
 329             vars = context.copy() # do not modify context, use a copy vars instead
 330             self.points = [None]*len(data.points)
 331             countcolumns = len(newcolumns)
 332             for i in xrange(len(data.points)):
 333                 datapoint = data.points[i]
 334                 point = [None]*countcolumns
 335                 newcolumnnumber = 0
 336                 for newcolumn in newcolumns:
 337                     if isinstance(newcolumn, copycolumn):
 338                         point[newcolumnnumber] = datapoint[newcolumn.oldcolumnnumber]
 339                     else:
 340                         # update the vars
 341                         # TODO: we could update it once for all varitems
 342                         for newcolumntitle, value in newcolumn.varitems:
 343                             vars[newcolumntitle] = datapoint[value]
 344                         point[newcolumnnumber] = newcolumn.tree.Calc(**vars)
 345                         # we could also do:
 346                         # point[newcolumnnumber] = eval(str(newcolumn.tree), vars)
 347                     newcolumnnumber += 1
 348                 self.points[i] = point
 349
 350             # store the column titles
 351             self.columns = {}
 352             newcolumnnumber = firstcolumnwithtitle
 353             for newcolumn in newcolumns[firstcolumnwithtitle:]:
 354                 self.columns[newcolumn.newcolumntitle] = newcolumnnumber
 355                 newcolumnnumber += 1
 356         else:
 357             # since only column copies are needed, we can share the original points
 358             self.points = data.points
 359
 360             # store the new column titles
 361             self.columns = {}
 362             for newcolumn in newcolumns[firstcolumnwithtitle:]:
 363                 self.columns[newcolumn.newcolumntitle] = newcolumn.oldcolumnnumber
 364
 365
 366 filecache = {}
 367
 368 class file(data):
 369
 370     defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
 371     defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
 372     defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
 373
 374     def splitline(self, line, stringpattern, columnpattern, tofloat=1):
 375         """returns a tuple created out of the string line
 376         - matches stringpattern and columnpattern, adds the first group of that
 377           match to the result and and removes those matches until the line is empty
 378         - when stringpattern matched, the result is always kept as a string
 379         - when columnpattern matched and tofloat is true, a conversion to a float
 380           is tried; when this conversion fails, the string is kept"""
 381         result = []
 382         # try to gain speed by skip matching regular expressions
 383         if line.find('"')!=-1 or \
 384            stringpattern is not self.defaultstringpattern or \
 385            columnpattern is not self.defaultcolumnpattern:
 386             while len(line):
 387                 match = stringpattern.match(line)
 388                 if match:
 389                     result.append(match.groups()[0])
 390                     line = line[match.end():]
 391                 else:
 392                     match = columnpattern.match(line)
 393                     if tofloat:
 394                         try:
 395                             result.append(float(match.groups()[0]))
 396                         except (TypeError, ValueError):
 397                             result.append(match.groups()[0])
 398                     else:
 399                         result.append(match.groups()[0])
 400                     line = line[match.end():]
 401         else:
 402             if tofloat:
 403                 try:
 404                     return map(float, line.split())
 405                 except (TypeError, ValueError):
 406                     result = []
 407                     for r in line.split():
 408                         try:
 409                             result.append(float(r))
 410                         except (TypeError, ValueError):
 411                             result.append(r)
 412             else:
 413                 return line.split()
 414         return result
 415
 416     def getcachekey(self, *args):
 417         return ":".join([str(x) for x in args])
 418
 419     def __init__(self, filename,
 420                        commentpattern=defaultcommentpattern,
 421                        stringpattern=defaultstringpattern,
 422                        columnpattern=defaultcolumnpattern,
 423                        skiphead=0, skiptail=0, every=1,
 424                        **kwargs):
 425         cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
 426         if not filecache.has_key(cachekey):
 427             file = open(filename)
 428             self.title = filename
 429             columns = {}
 430             points = []
 431             linenumber = 0
 432             maxcolumns = 0
 433             for line in file.readlines():
 434                 line = line.strip()
 435                 match = commentpattern.match(line)
 436                 if match:
 437                     if not len(points):
 438                         keys = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
 439                         i = 0
 440                         for key in keys:
 441                             i += 1
 442                             columns[key] = i
 443                 else:
 444                     linedata = []
 445                     for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
 446                         linedata.append(value)
 447                     if len(linedata):
 448                         if linenumber >= skiphead and not ((linenumber - skiphead) % every):
 449                             linedata = [linenumber + 1] + linedata
 450                             if len(linedata) > maxcolumns:
 451                                 maxcolumns = len(linedata)
 452                             points.append(linedata)
 453                         linenumber += 1
 454             if skiptail:
 455                 del points[-skiptail:]
 456             filecache[cachekey] = list(points, title=filename, maxcolumns=maxcolumns, addlinenumbers=0, **columns)
 457         data.__init__(self, filecache[cachekey], **kwargs)
 458
 459
 460 conffilecache = {}
 461
 462 class conffile(data):
 463
 464     def __init__(self, filename, **kwargs):
 465         """read data from a config-like file
 466         - filename is a string
 467         - each row is defined by a section in the config-like file (see
 468           config module description)
 469         - the columns for each row are defined by lines in the section file;
 470           the option entries identify and name the columns
 471         - further keyword arguments are passed to the constructor of data,
 472           keyword arguments data and titles excluded"""
 473         cachekey = filename
 474         if not filecache.has_key(cachekey):
 475             config = ConfigParser.ConfigParser()
 476             config.optionxform = str
 477             config.readfp(open(filename, "r"))
 478             sections = config.sections()
 479             sections.sort()
 480             points = [None]*len(sections)
 481             maxcolumns = 1
 482             columns = {}
 483             for i in xrange(len(sections)):
 484                 point = [sections[i]] + [None]*(maxcolumns-1)
 485                 for option in config.options(sections[i]):
 486                     value = config.get(sections[i], option)
 487                     try:
 488                         value = float(value)
 489                     except:
 490                         pass
 491                     try:
 492                         index = columns[option]
 493                     except KeyError:
 494                         columns[option] = maxcolumns
 495                         point.append(value)
 496                         maxcolumns += 1
 497                     else:
 498                         point[index] = value
 499                 points[i] = point
 500             conffilecache[cachekey] = list(points, title=filename, maxcolumns=maxcolumns, addlinenumbers=0, **columns)
 501         data.__init__(self, conffilecache[cachekey], **kwargs)
 502
 503
 504
 505 class function:
 506
 507     defaultstyle = style.line()
 508
 509     def __init__(self, expression, title=notitle, min=None, max=None,
 510     points=100, parser=mathtree.parser(), context={}):
 511
 512         if title is notitle:
 513             self.title = expression
 514         else:
 515             self.title = title
 516         self.min = min
 517         self.max = max
 518         self.numberofpoints = points
 519         self.context = context.copy() # be save on late evaluations
 520         self.result, expression = [x.strip() for x in expression.split("=")]
 521         self.mathtree = parser.parse(expression)
 522         self.variable = None
 523
 524     def setstyles(self, graph, styles):
 525         self.styles = styles
 526         self.styledata = styledata()
 527         for variable in self.mathtree.VarList():
 528             if variable in graph.axes.keys():
 529                 if self.variable is None:
 530                     self.variable = variable
 531                 else:
 532                     raise ValueError("multiple variables found")
 533         if self.variable is None:
 534             raise ValueError("no variable found")
 535         self.xaxis = graph.axes[self.variable]
 536         self.columns = {self.variable: 1, self.result: 2}
 537         unhandledcolumns = self.columns
 538         for style in self.styles:
 539             unhandledcolumns = style.setdata(graph, unhandledcolumns, self.styledata)
 540         unhandledcolumnkeys = unhandledcolumns.keys()
 541         if len(unhandledcolumnkeys):
 542             raise ValueError("style couldn't handle column keys %s" % unhandledcolumnkeys)
 543
 544     def selectstyle(self, graph, selectindex, selecttotal):
 545         for style in self.styles:
 546             style.selectstyle(selectindex, selecttotal, self.styledata)
 547
 548     def adjustaxes(self, graph, step):
 549         """
 550         - on step == 0 axes with fixed data should be adjusted
 551         - on step == 1 the current axes ranges might be used to
 552           calculate further data (e.g. y data for a function y=f(x)
 553           where the y range depends on the x range)
 554         - on step == 2 axes ranges not previously set should be
 555           updated by data accumulated by step 1"""
 556         if step == 0:
 557             self.points = []
 558             if self.min is not None:
 559                 self.points.append([None, self.min])
 560             if self.max is not None:
 561                 self.points.append([None, self.max])
 562             for style in self.styles:
 563                 style.adjustaxes(self.points, [1], self.styledata)
 564         elif step == 1:
 565             min, max = graph.axes[self.variable].getrange()
 566             if self.min is not None: min = self.min
 567             if self.max is not None: max = self.max
 568             vmin = self.xaxis.convert(min)
 569             vmax = self.xaxis.convert(max)
 570             self.points = []
 571             for i in range(self.numberofpoints):
 572                 v = vmin + (vmax-vmin)*i / (self.numberofpoints-1.0)
 573                 x = self.xaxis.invert(v)
 574                 # caution: the virtual coordinate might differ once
 575                 # the axis rescales itself to include further ticks etc.
 576                 self.points.append([v, x, None])
 577             for point in self.points:
 578                 self.context[self.variable] = point[1]
 579                 try:
 580                     point[2] = self.mathtree.Calc(**self.context)
 581                 except (ArithmeticError, ValueError):
 582                     pass
 583         elif step == 2:
 584             for style in self.styles:
 585                 style.adjustaxes(self.points, [2], self.styledata)
 586
 587     def draw(self, graph):
 588         # TODO code dublication
 589         for style in self.styles:
 590             style.initdrawpoints(graph, self.styledata)
 591         for point in self.points:
 592             self.styledata.point = point
 593             for style in self.styles:
 594                 style.drawpoint(graph, self.styledata)
 595         for style in self.styles:
 596             style.donedrawpoints(graph, self.styledata)
 597
 598
 599 class paramfunction:
 600
 601     defaultstyle = style.line()
 602
 603     def __init__(self, varname, min, max, expression, title=notitle, points=100, parser=mathtree.parser(), context={}):
 604         if title is notitle:
 605             self.title = expression
 606         else:
 607             self.title = title
 608         self.varname = varname
 609         self.min = min
 610         self.max = max
 611         self.numberofpoints = points
 612         self.expression = {}
 613         varlist, expressionlist = expression.split("=")
 614         keys = varlist.split(",")
 615         mathtrees = parser.parse(expressionlist)
 616         if len(keys) != len(mathtrees):
 617             raise ValueError("unpack tuple of wrong size")
 618         self.points = [None]*self.numberofpoints
 619         emptyresult = [None]*len(keys)
 620         self.columns = {}
 621         i = 1
 622         for key in keys:
 623             self.columns[key.strip()] = i
 624             i += 1
 625         for i in range(self.numberofpoints):
 626             param = self.min + (self.max-self.min)*i / (self.numberofpoints-1.0)
 627             context[self.varname] = param
 628             self.points[i] = [param] + emptyresult
 629             column = 1
 630             for key, column in self.columns.items():
 631                 self.points[i][column] = mathtrees[column-1].Calc(**context)
 632                 column += 1
 633
 634     def setstyles(self, graph, style):
 635         self.style = style
 636         unhandledcolumns = self.style.setdata(graph, self.columns, self.styledata)
 637         unhandledcolumnkeys = unhandledcolumns.keys()
 638         if len(unhandledcolumnkeys):
 639             raise ValueError("style couldn't handle column keys %s" % unhandledcolumnkeys)
 640
 641     def selectstyle(self, graph, selectindex, selecttotal):
 642         self.style.selectstyle(selectindex, selecttotal, self.styledata)
 643
 644     def adjustaxes(self, graph, step):
 645         if step == 0:
 646             self.style.adjustaxes(self.points, self.columns.values(), self.styledata)
 647
 648     def draw(self, graph):
 649         raise # TODO
 650         self.style.drawpoints(self.points, graph, self.styledata)
 651