pyx/graph/data.py

   1 #!/usr/bin/env python
   2 # -*- coding: ISO-8859-1 -*-
   3 #
   4 #
   5 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
   6 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
   7 # Copyright (C) 2002-2004 André Wobst <wobsta@users.sourceforge.net>
   8 #
   9 # This file is part of PyX (http://pyx.sourceforge.net/).
  10 #
  11 # PyX is free software; you can redistribute it and/or modify
  12 # it under the terms of the GNU General Public License as published by
  13 # the Free Software Foundation; either version 2 of the License, or
  14 # (at your option) any later version.
  15 #
  16 # PyX is distributed in the hope that it will be useful,
  17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 # GNU General Public License for more details.
  20 #
  21 # You should have received a copy of the GNU General Public License
  22 # along with PyX; if not, write to the Free Software
  23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  24
  25
  26 import re, ConfigParser
  27 from pyx import mathtree
  28 from pyx.graph import style
  29
  30
  31 class _Idata:
  32     """interface definition of a data object
  33     data objects store data arranged in rows and columns"""
  34
  35     columns = {}
  36     """a dictionary mapping column titles to column numbers"""
  37
  38     points = []
  39     """column/row data
  40     - a list of rows where each row represents a data point
  41     - each row contains a list, where each entry of the list represents a value for a column
  42     - the number of columns for each data point must match the number of columns
  43     - any column enty of any data point might be a float, a string, or None"""
  44
  45     title = ""
  46     """a string (for printing in PyX, e.g. in a graph key)
  47     - None is allowed, which marks the data instance to have no title,
  48       e.g. it should be skiped in a graph key etc.
  49     - the title does need to be unique"""
  50
  51     def getcolumnnumber(self, column):
  52         """returns a column number
  53         - the column parameter might be an integer to be used as a column number
  54         - a column number must be a valid list index (negative values are allowed)
  55         - the column parameter might be a string contained in the columns list;
  56           to be valid, the string must be unique within the columns list"""
  57
  58     def getcolumn(self, column):
  59         """returns a column
  60         - extracts a column out of self.data and returns it as a list
  61         - the column is identified by the parameter column as in getcolumnnumber"""
  62
  63
  64 class _data:
  65
  66     defaultstyle = style.symbol()
  67
  68     def getcolumnnumber(self, key):
  69         try:
  70             key + ""
  71         except:
  72             return key + 0
  73         else:
  74             return self.columns[key.strip()]
  75
  76     def getcolumn(self, key):
  77         columnno = self.getcolumnnumber(key)
  78         return [point[columnno] for point in self.points]
  79
  80     def setstyle(self, graph, style):
  81         self.style = style
  82         unhandledcolumns = self.style.setdata(graph, self.columns, self)
  83         unhandledcolumnkeys = unhandledcolumns.keys()
  84         if len(unhandledcolumnkeys):
  85             raise ValueError("style couldn't handle column keys %s" % unhandledcolumnkeys)
  86
  87     def selectstyle(self, graph, selectindex, selecttotal):
  88         self.style.selectstyle(selectindex, selecttotal, self)
  89
  90     def adjustaxes(self, graph, step):
  91         """
  92         - on step == 0 axes with fixed data should be adjusted
  93         - on step == 1 the current axes ranges might be used to
  94           calculate further data (e.g. y data for a function y=f(x)
  95           where the y range depends on the x range)
  96         - on step == 2 axes ranges not previously set should be
  97           updated by data accumulated by step 1"""
  98         if step == 0:
  99             self.style.adjustaxes(self.columns.values(), self)
 100
 101     def draw(self, graph):
 102         self.style.drawpoints(graph, self)
 103
 104
 105 class list(_data):
 106     "creates data out of a list"
 107
 108     def checkmaxcolumns(self, points, maxcolumns=None):
 109         if maxcolumns is None:
 110             maxcolumns = max([len(point) for point in points])
 111         for i in xrange(len(points)):
 112             l = len(points[i])
 113             if l < maxcolumns:
 114                 try:
 115                     p = points[i] + [None] * (maxcolumns - l)
 116                 except:
 117                     # points[i] are not a list
 118                     p = __builtins__.list(points[i]) + [None] * (maxcolumns - l)
 119                 try:
 120                     points[i] = p
 121                 except:
 122                     # points are not a list -> end loop without step into else
 123                     break
 124         else:
 125             # the loop finished successfull
 126             return points
 127         # since points are not a list, convert them and try again
 128         return checkmaxcolumns(__builtins__.list(points), maxcolumns=maxcolumns)
 129
 130     def __init__(self, points, title="user provided list", maxcolumns=None, addlinenumbers=1, **columns):
 131         points = self.checkmaxcolumns(points, maxcolumns)
 132         if addlinenumbers:
 133             for i in xrange(len(points)):
 134                 try:
 135                     points[i].insert(0, i+1)
 136                 except:
 137                     points[i] = [i+1] + __builtins__.list(points[i])
 138         self.points = points
 139         self.columns = columns
 140         self.title = title
 141
 142
 143 ##############################################################
 144 # math tree enhanced by column handling
 145 ##############################################################
 146
 147 class MathTreeFuncCol(mathtree.MathTreeFunc1):
 148
 149     def __init__(self, *args):
 150         mathtree.MathTreeFunc1.__init__(self, "_column_", *args)
 151
 152     def VarList(self):
 153         # we misuse VarList here:
 154         # - instead of returning a string, we return this instance itself
 155         # - before calculating the expression, you must call ColumnNameAndNumber
 156         #   once (when limiting the context to external defined variables,
 157         #   otherwise you have to call it each time)
 158         return [self]
 159
 160     def ColumnNameAndNumber(_hidden_self, **args):
 161         number = int(_hidden_self.Args[0].Calc(**args))
 162         _hidden_self.varname = "_column_%i" % number
 163         return _hidden_self.varname, number
 164
 165     def __str__(self):
 166         return self.varname
 167
 168     def Calc(_hidden_self, **args):
 169         return args[_hidden_self.varname]
 170
 171 MathTreeFuncsWithCol = mathtree.DefaultMathTreeFuncs + [MathTreeFuncCol]
 172
 173
 174 class columntree:
 175
 176     def __init__(self, tree):
 177         self.tree = tree
 178         self.Calc = tree.Calc
 179         self.__str__ = tree.__str__
 180
 181     def VarList(self):
 182         # returns a list of regular variables (strings) like the original mathtree
 183         return [var for var in self.tree.VarList() if not isinstance(var, MathTreeFuncCol) and var[:8] != "_column_"]
 184
 185     def columndict(_hidden_self, **context):
 186         # returns a dictionary of column names (keys) and column numbers (values)
 187         columndict = {}
 188         for var in _hidden_self.tree.VarList():
 189             if isinstance(var, MathTreeFuncCol):
 190                 name, number = var.ColumnNameAndNumber(**context)
 191                 columndict[name] = number
 192             elif var[:8] == "_column_":
 193                 columndict[var] = int(var[8:])
 194         return columndict
 195
 196
 197 class dataparser(mathtree.parser):
 198     # mathtree parser enhanced by column handling
 199     # parse returns a columntree instead of a regular tree
 200
 201     def __init__(self, MathTreeFuncs=MathTreeFuncsWithCol, **kwargs):
 202         mathtree.parser.__init__(self, MathTreeFuncs=MathTreeFuncs, **kwargs)
 203
 204     def parse(self, expr):
 205         return columntree(mathtree.parser.parse(self, expr.replace("$", "_column_")))
 206
 207 ##############################################################
 208
 209
 210 class copycolumn:
 211     # a helper storage class to mark a new column to copied
 212     # out of data from an old column
 213     def __init__(self, newcolumntitle, oldcolumnnumber):
 214         self.newcolumntitle = newcolumntitle
 215         self.oldcolumnnumber = oldcolumnnumber
 216
 217 class mathcolumn:
 218     """a helper storage class to mark a new column to created
 219     by evaluating a mathematical expression"""
 220     def __init__(self, newcolumntitle, expression, tree, varitems):
 221         # - expression is a string
 222         # - tree is a parsed mathematical tree, e.g. we can have
 223         #   call tree.Calc(**vars), where the dict vars maps variable
 224         #   names to values
 225         # - varitems is a list of (key, value) pairs, where the key
 226         #   stands is a variable name in the mathematical tree and
 227         #   the value is its value"""
 228         self.newcolumntitle = newcolumntitle
 229         self.expression = expression
 230         self.tree = tree
 231         self.varitems = varitems
 232
 233 class notitle:
 234     """this is a helper class to mark, that no title was privided
 235     (since a title equals None is a valid input, it needs to be
 236     distinguished from providing no title when a title will be
 237     created automatically)"""
 238     pass
 239
 240 class data(_data):
 241     "creates a new data set out of an existing data set"
 242
 243     def __init__(self, data, title=notitle, parser=dataparser(), context={}, **columns):
 244         defaultstyle = data.defaultstyle
 245
 246         # build a nice title
 247         if title is notitle:
 248             items = columns.items()
 249             items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
 250             self.title = data.title + ": " + ", ".join(["%s=%s" % item for item in items])
 251         else:
 252             self.title = title
 253
 254         # analyse the **columns argument
 255         newcolumns = []
 256         hasmathcolumns = 0
 257         for newcolumntitle, columnexpr in columns.items():
 258             try:
 259                 # try if it is a valid column identifier
 260                 oldcolumnnumber = data.getcolumnnumber(columnexpr)
 261             except:
 262                 # if not it should be a mathematical expression
 263                 tree = parser.parse(columnexpr)
 264                 columndict = tree.columndict(**context)
 265                 for var in tree.VarList():
 266                     try:
 267                         columndict[var] = data.getcolumnnumber(var)
 268                     except KeyError, e:
 269                         if var not in context.keys():
 270                             raise e
 271                 newcolumns.append(mathcolumn(newcolumntitle, columnexpr, tree, columndict.items()))
 272                 hasmathcolumns = 1
 273             else:
 274                 newcolumns.append(copycolumn(newcolumntitle, oldcolumnnumber))
 275
 276         # ensure to copy the zeroth column (line number)
 277         # if we already do, place it first again, otherwise add it to the front
 278         i = 0
 279         for newcolumn in newcolumns:
 280             if isinstance(newcolumn, copycolumn) and not newcolumn.oldcolumnnumber:
 281                 newcolumns.pop(i)
 282                 newcolumns.insert(0, newcolumn)
 283                 firstcolumnwithtitle = 0
 284                 break
 285             i += 1
 286         else:
 287             newcolumns.insert(0, copycolumn(None, 0))
 288             firstcolumnwithtitle = 1
 289
 290         if hasmathcolumns:
 291             # new column data needs to be calculated
 292             vars = context.copy() # do not modify context, use a copy vars instead
 293             self.points = [None]*len(data.points)
 294             countcolumns = len(newcolumns)
 295             for i in xrange(len(data.points)):
 296                 datapoint = data.points[i]
 297                 point = [None]*countcolumns
 298                 newcolumnnumber = 0
 299                 for newcolumn in newcolumns:
 300                     if isinstance(newcolumn, copycolumn):
 301                         point[newcolumnnumber] = datapoint[newcolumn.oldcolumnnumber]
 302                     else:
 303                         # update the vars
 304                         # TODO: we could update it once for all varitems
 305                         for newcolumntitle, value in newcolumn.varitems:
 306                             vars[newcolumntitle] = datapoint[value]
 307                         point[newcolumnnumber] = newcolumn.tree.Calc(**vars)
 308                         # we could also do:
 309                         # point[newcolumnnumber] = eval(str(newcolumn.tree), vars)
 310                     newcolumnnumber += 1
 311                 self.points[i] = point
 312
 313             # store the column titles
 314             self.columns = {}
 315             newcolumnnumber = firstcolumnwithtitle
 316             for newcolumn in newcolumns[firstcolumnwithtitle:]:
 317                 self.columns[newcolumn.newcolumntitle] = newcolumnnumber
 318                 newcolumnnumber += 1
 319         else:
 320             # since only column copies are needed, we can share the original points
 321             self.points = data.points
 322
 323             # store the new column titles
 324             self.columns = {}
 325             for newcolumn in newcolumns[firstcolumnwithtitle:]:
 326                 self.columns[newcolumn.newcolumntitle] = newcolumn.oldcolumnnumber
 327
 328
 329 filecache = {}
 330
 331 class file(data):
 332
 333     defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
 334     defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
 335     defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
 336
 337     def splitline(self, line, stringpattern, columnpattern, tofloat=1):
 338         """returns a tuple created out of the string line
 339         - matches stringpattern and columnpattern, adds the first group of that
 340           match to the result and and removes those matches until the line is empty
 341         - when stringpattern matched, the result is always kept as a string
 342         - when columnpattern matched and tofloat is true, a conversion to a float
 343           is tried; when this conversion fails, the string is kept"""
 344         result = []
 345         # try to gain speed by skip matching regular expressions
 346         if line.find('"')!=-1 or \
 347            stringpattern is not self.defaultstringpattern or \
 348            columnpattern is not self.defaultcolumnpattern:
 349             while len(line):
 350                 match = stringpattern.match(line)
 351                 if match:
 352                     result.append(match.groups()[0])
 353                     line = line[match.end():]
 354                 else:
 355                     match = columnpattern.match(line)
 356                     if tofloat:
 357                         try:
 358                             result.append(float(match.groups()[0]))
 359                         except (TypeError, ValueError):
 360                             result.append(match.groups()[0])
 361                     else:
 362                         result.append(match.groups()[0])
 363                     line = line[match.end():]
 364         else:
 365             if tofloat:
 366                 try:
 367                     return map(float, line.split())
 368                 except (TypeError, ValueError):
 369                     result = []
 370                     for r in line.split():
 371                         try:
 372                             result.append(float(r))
 373                         except (TypeError, ValueError):
 374                             result.append(r)
 375             else:
 376                 return line.split()
 377         return result
 378
 379     def getcachekey(self, *args):
 380         return ":".join([str(x) for x in args])
 381
 382     def __init__(self, filename,
 383                        commentpattern=defaultcommentpattern,
 384                        stringpattern=defaultstringpattern,
 385                        columnpattern=defaultcolumnpattern,
 386                        skiphead=0, skiptail=0, every=1,
 387                        **kwargs):
 388         cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
 389         if not filecache.has_key(cachekey):
 390             file = open(filename)
 391             self.title = filename
 392             columns = {}
 393             points = []
 394             linenumber = 0
 395             maxcolumns = 0
 396             for line in file.readlines():
 397                 line = line.strip()
 398                 match = commentpattern.match(line)
 399                 if match:
 400                     if not len(points):
 401                         keys = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
 402                         i = 0
 403                         for key in keys:
 404                             i += 1
 405                             columns[key] = i
 406                 else:
 407                     linedata = []
 408                     for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
 409                         linedata.append(value)
 410                     if len(linedata):
 411                         if linenumber >= skiphead and not ((linenumber - skiphead) % every):
 412                             linedata = [linenumber + 1] + linedata
 413                             if len(linedata) > maxcolumns:
 414                                 maxcolumns = len(linedata)
 415                             points.append(linedata)
 416                         linenumber += 1
 417             if skiptail:
 418                 del points[-skiptail:]
 419             filecache[cachekey] = list(points, title=filename, maxcolumns=maxcolumns, addlinenumbers=0, **columns)
 420         data.__init__(self, filecache[cachekey], **kwargs)
 421
 422
 423 conffilecache = {}
 424
 425 class conffile(data):
 426
 427     def __init__(self, filename, **kwargs):
 428         """read data from a config-like file
 429         - filename is a string
 430         - each row is defined by a section in the config-like file (see
 431           config module description)
 432         - the columns for each row are defined by lines in the section file;
 433           the option entries identify and name the columns
 434         - further keyword arguments are passed to the constructor of data,
 435           keyword arguments data and titles excluded"""
 436         cachekey = filename
 437         if not filecache.has_key(cachekey):
 438             config = ConfigParser.ConfigParser()
 439             config.optionxform = str
 440             config.readfp(open(filename, "r"))
 441             sections = config.sections()
 442             sections.sort()
 443             points = [None]*len(sections)
 444             maxcolumns = 1
 445             columns = {}
 446             for i in xrange(len(sections)):
 447                 point = [sections[i]] + [None]*(maxcolumns-1)
 448                 for option in config.options(sections[i]):
 449                     value = config.get(sections[i], option)
 450                     try:
 451                         value = float(value)
 452                     except:
 453                         pass
 454                     try:
 455                         index = columns[option]
 456                     except KeyError:
 457                         columns[option] = maxcolumns
 458                         point.append(value)
 459                         maxcolumns += 1
 460                     else:
 461                         point[index] = value
 462                 points[i] = point
 463             conffilecache[cachekey] = list(points, title=filename, maxcolumns=maxcolumns, addlinenumbers=0, **columns)
 464         data.__init__(self, conffilecache[cachekey], **kwargs)
 465
 466
 467
 468 class function:
 469
 470     defaultstyle = style.line()
 471
 472     def __init__(self, expression, title=notitle, min=None, max=None,
 473     points=100, parser=mathtree.parser(), context={}):
 474
 475         if title is notitle:
 476             self.title = expression
 477         else:
 478             self.title = title
 479         self.min = min
 480         self.max = max
 481         self.numberofpoints = points
 482         self.context = context.copy() # be save on late evaluations
 483         self.result, expression = [x.strip() for x in expression.split("=")]
 484         self.mathtree = parser.parse(expression)
 485         self.variable = None
 486
 487     def setstyle(self, graph, style):
 488         self.style = style
 489         for variable in self.mathtree.VarList():
 490             if variable in graph.axes.keys():
 491                 if self.variable is None:
 492                     self.variable = variable
 493                 else:
 494                     raise ValueError("multiple variables found")
 495         if self.variable is None:
 496             raise ValueError("no variable found")
 497         self.xaxis = graph.axes[self.variable]
 498         self.columns = {self.variable: 1, self.result: 2}
 499         unhandledcolumns = self.style.setdata(graph, self.columns, self)
 500         unhandledcolumnkeys = unhandledcolumns.keys()
 501         if len(unhandledcolumnkeys):
 502             raise ValueError("style couldn't handle column keys %s" % unhandledcolumnkeys)
 503
 504     def selectstyle(self, graph, selectindex, selecttotal):
 505         self.style.selectstyle(selectindex, selecttotal, self)
 506
 507     def adjustaxes(self, graph, step):
 508         """
 509         - on step == 0 axes with fixed data should be adjusted
 510         - on step == 1 the current axes ranges might be used to
 511           calculate further data (e.g. y data for a function y=f(x)
 512           where the y range depends on the x range)
 513         - on step == 2 axes ranges not previously set should be
 514           updated by data accumulated by step 1"""
 515         if step == 0:
 516             self.points = []
 517             if self.min is not None:
 518                 self.points.append([None, self.min])
 519             if self.max is not None:
 520                 self.points.append([None, self.max])
 521             self.style.adjustaxes([1], self)
 522         elif step == 1:
 523             min, max = graph.axes[self.variable].getrange()
 524             if self.min is not None: min = self.min
 525             if self.max is not None: max = self.max
 526             vmin = self.xaxis.convert(min)
 527             vmax = self.xaxis.convert(max)
 528             self.points = []
 529             for i in range(self.numberofpoints):
 530                 v = vmin + (vmax-vmin)*i / (self.numberofpoints-1.0)
 531                 x = self.xaxis.invert(v)
 532                 # caution: the virtual coordinate might differ once
 533                 # the axis rescales itself to include further ticks etc.
 534                 self.points.append([v, x, None])
 535             for point in self.points:
 536                 self.context[self.variable] = point[1]
 537                 try:
 538                     point[2] = self.mathtree.Calc(**self.context)
 539                 except (ArithmeticError, ValueError):
 540                     pass
 541         elif step == 2:
 542             self.style.adjustaxes([2], self)
 543
 544     def draw(self, graph):
 545         self.style.drawpoints(graph, self)
 546
 547
 548 class paramfunction:
 549
 550     defaultstyle = style.line()
 551
 552     def __init__(self, varname, min, max, expression, title=notitle, points=100, parser=mathtree.parser(), context={}):
 553         if title is notitle:
 554             self.title = expression
 555         else:
 556             self.title = title
 557         self.varname = varname
 558         self.min = min
 559         self.max = max
 560         self.numberofpoints = points
 561         self.expression = {}
 562         varlist, expressionlist = expression.split("=")
 563         keys = varlist.split(",")
 564         mathtrees = parser.parse(expressionlist)
 565         if len(keys) != len(mathtrees):
 566             raise ValueError("unpack tuple of wrong size")
 567         self.points = [None]*self.numberofpoints
 568         emptyresult = [None]*len(keys)
 569         self.columns = {}
 570         i = 1
 571         for key in keys:
 572             self.columns[key.strip()] = i
 573             i += 1
 574         for i in range(self.numberofpoints):
 575             param = self.min + (self.max-self.min)*i / (self.numberofpoints-1.0)
 576             context[self.varname] = param
 577             self.points[i] = [param] + emptyresult
 578             column = 1
 579             for key, column in self.columns.items():
 580                 self.points[i][column] = mathtrees[column-1].Calc(**context)
 581                 column += 1
 582
 583     def setstyle(self, graph, style):
 584         self.style = style
 585         unhandledcolumns = self.style.setdata(graph, self.columns, self)
 586         unhandledcolumnkeys = unhandledcolumns.keys()
 587         if len(unhandledcolumnkeys):
 588             raise ValueError("style couldn't handle column keys %s" % unhandledcolumnkeys)
 589
 590     def selectstyle(self, graph, selectindex, selecttotal):
 591         self.style.selectstyle(selectindex, selecttotal, self)
 592
 593     def adjustaxes(self, graph, step):
 594         if step == 0:
 595             self.style.adjustaxes(self.columns.values(), self)
 596
 597     def draw(self, graph):
 598         self.style.drawpoints(graph, self)
 599