pyx/graph/data.py

   1 #!/usr/bin/env python
   2 # -*- coding: ISO-8859-1 -*-
   3 #
   4 #
   5 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
   6 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
   7 # Copyright (C) 2002-2004 André Wobst <wobsta@users.sourceforge.net>
   8 #
   9 # This file is part of PyX (http://pyx.sourceforge.net/).
  10 #
  11 # PyX is free software; you can redistribute it and/or modify
  12 # it under the terms of the GNU General Public License as published by
  13 # the Free Software Foundation; either version 2 of the License, or
  14 # (at your option) any later version.
  15 #
  16 # PyX is distributed in the hope that it will be useful,
  17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 # GNU General Public License for more details.
  20 #
  21 # You should have received a copy of the GNU General Public License
  22 # along with PyX; if not, write to the Free Software
  23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  24
  25
  26 import re, ConfigParser
  27 from pyx import mathtree
  28 from pyx.graph import style
  29
  30
  31 class _Idata:
  32     """Interface for graph data
  33
  34     Graph data consists in columns, where each column might
  35     be identified by a string or an integer. Each row in the
  36     resulting table refers to a data point."""
  37
  38     def getcolumndataindex(self, column):
  39         """Data for a column
  40
  41         This method returns data of a column by a tuple data, index.
  42         column identifies the column. If index is not None, the data
  43         of the column is found at position index for each element of
  44         the list data. If index is None, the data is the list of
  45         data."""
  46
  47     def getcolumn(self, column):
  48         """Data for a column
  49
  50         This method returns the data of a column in a list. column
  51         has the same meaning as in getcolumndataindex. Note, that
  52         this method typically has to create this list, which needs
  53         time and memory. While its easy to the user, internally it
  54         should be avoided in favor of getcolumndataindex. The method
  55         can be implemented as follows:"""
  56         data, index = self.getcolumndataindex(column)
  57         if index is None:
  58             return data
  59         else:
  60             return [point[index] for point in data]
  61
  62     def getcount(self):
  63         """Number of points
  64
  65         This method returns the number of points. All results by
  66         getcolumndataindex and getcolumn will fit this number."""
  67
  68     def getdefaultstyles(self):
  69         """Default styles for the data
  70
  71         Returns a list of default styles for the data. Note to
  72         return the same instances when the graph should iterate
  73         over the styles using selectstyles. The following default
  74         implementation returns the value of the defaultstyles
  75         class variable."""
  76
  77     def gettitle(self):
  78         """Title of the data
  79
  80         This method returns a title string for the data to be used
  81         in graph keys and probably other locations. The method might
  82         return None to indicate, that there is no title and the data
  83         should be skiped in a graph key. Alternatively, the title
  84         might contain a list of strings. The list should fit the
  85         return value of the key_pt method. Data titles does not need
  86         to be unique."""
  87
  88     def setstyles(self, graph, styles):
  89         """Attach graph styles to data
  90
  91         This method is called by the graph to attach styles to the
  92         data instance."""
  93
  94     def selectstyles(self, graph, selectindex, selecttotal):
  95         """Perform select on the styles
  96
  97         This method should perfrom selectstyle calls on all styles."""
  98         for style in self.styles:
  99             style.selectstyle(self.styledata, graph, selectindex, selecttotal)
 100
 101     def adjustaxes(self, graph, step):
 102         """Adjust axes ranges
 103
 104         This method should call adjustaxis for all styles.
 105         On step == 0 axes with fixed data should be adjusted.
 106         On step == 1 the current axes ranges might be used to
 107         calculate further data (e.g. y data for a function y=f(x)
 108         where the y range depends on the x range). On step == 2
 109         axes ranges not previously set should be updated by data
 110         accumulated by step 1."""
 111
 112     def draw(self, graph):
 113         """Draw data
 114
 115         This method should draw the data."""
 116
 117     def key_pt(self, graph, x_pt, y_pt, width_pt, height_pt, dy_pt):
 118         """Draw graph key
 119
 120         This method should draw a graph key at the given position
 121         x_pt, y_pt indicating the lower left corner of the given
 122         area width_pt, height_pt. The styles might draw several
 123         key entries shifted vertically by dy_pt. The method returns
 124         the number of key entries."""
 125
 126
 127 class styledata:
 128     """Styledata storage class
 129
 130     Instances of this class are used to store data from the styles
 131     and to pass point data to the styles.  is shared
 132     between all the style(s) in use by a data instance"""
 133     pass
 134
 135
 136 class _data(_Idata):
 137     """Partly implements the _Idata interface
 138
 139     This class partly implements the _Idata interface. In order
 140     to do so, it makes use of various instance variables:
 141
 142         self.data:
 143         self.columns:
 144         self.styles:
 145         self.styledata:
 146         self.title: the title of the data
 147         self.defaultstyles:"""
 148
 149     defaultstyles = [style.symbol()]
 150
 151     def getcolumndataindex(self, column):
 152         return self.data, self.columns[column]
 153
 154     def getcount(self):
 155         return len(self.data)
 156
 157     def gettitle(self):
 158         return self.title
 159
 160     def getdefaultstyles(self):
 161         return self.defaultstyles
 162
 163     def addneededstyles(self, styles):
 164         """helper method (not part of the interface)
 165
 166         This is a helper method, which returns a list of styles where
 167         provider styles are added in front to fullfill all needs of the
 168         given styles."""
 169         provided = [] # already provided styledata variables
 170         addstyles = [] # a list of style instances to be added in front
 171         for s in styles:
 172             for n in s.need:
 173                 if n not in provided:
 174                     addstyles.append(style.provider[n])
 175                     provided.extend(style.provider[n].provide)
 176             provided.extend(s.provide)
 177         return addstyles + styles
 178
 179     def setcolumns(self, styledata, graph, styles, columns):
 180         """helper method (not part of the interface)
 181
 182         This is a helper method to perform setcolumn to all styles."""
 183         usedcolumns = []
 184         for style in styles:
 185             usedcolumns.extend(style.columns(self.styledata, graph, columns))
 186         for column in columns:
 187             if column not in usedcolumns:
 188                 raise ValueError("unused column '%s'" % column)
 189
 190     def setstyles(self, graph, styles):
 191         self.styledata = styledata()
 192         self.styles = self.addneededstyles(styles)
 193         self.setcolumns(self.styledata, graph, self.styles, self.columns.keys())
 194
 195     def selectstyles(self, graph, selectindex, selecttotal):
 196         for style in self.styles:
 197             style.selectstyle(self.styledata, graph, selectindex, selecttotal)
 198
 199     def adjustaxes(self, graph, step):
 200         if step == 0:
 201             for column in self.columns.keys():
 202                 data, index = self.getcolumndataindex(column)
 203                 for style in self.styles:
 204                     style.adjustaxis(self.styledata, graph, column, data, index)
 205
 206     def draw(self, graph):
 207         columndataindex = []
 208         for column in self.columns.keys():
 209             data, index = self.getcolumndataindex(column)
 210             columndataindex.append((column, data, index))
 211         if len(columndataindex):
 212             column, data, index = columndataindex[0]
 213             l = len(data)
 214             for column, data, index in columndataindex[1:]:
 215                 if l != len(data):
 216                     raise ValueError("data len differs")
 217             self.styledata.point = {}
 218             for style in self.styles:
 219                 style.initdrawpoints(self.styledata, graph)
 220             for i in xrange(l):
 221                 for column, data, index in columndataindex:
 222                     if index is not None:
 223                         self.styledata.point[column] = data[i][index]
 224                     else:
 225                         self.styledata.point[column] = data[i]
 226                 for style in self.styles:
 227                     style.drawpoint(self.styledata, graph)
 228             for style in self.styles:
 229                 style.donedrawpoints(self.styledata, graph)
 230
 231     def key_pt(self, graph, x_pt, y_pt, width_pt, height_pt, dy_pt):
 232         i = None
 233         for style in self.styles:
 234             j = style.key_pt(self.styledata, graph, x_pt, y_pt, width_pt, height_pt)
 235             if i is None:
 236                 if j is not None:
 237                     i = j
 238             elif j is not None and i != j:
 239                 raise ValueError("different number of graph keys")
 240         if i is None:
 241             raise ValueError("no graph key available")
 242         return i
 243
 244
 245 class list(_data):
 246     "Graph data from a list of points"
 247
 248     def getcolumndataindex(self, column):
 249         try:
 250             if self.addlinenumbers:
 251                 index = self.columns[column]-1
 252             else:
 253                 index = self.columns[column]
 254         except KeyError:
 255             try:
 256                 if type(column) != type(column + 0):
 257                     raise ValueError("integer expected")
 258             except:
 259                 raise ValueError("integer expected")
 260             if self.addlinenumbers:
 261                 if column > 0:
 262                     index = column-1
 263                 elif column < 0:
 264                     index = column
 265                 else:
 266                     return range(1, 1+len(self.data)), None
 267             else:
 268                 index = column
 269         return self.data, index
 270
 271     def __init__(self, data, title="user provided list", addlinenumbers=1, **columns):
 272         if len(data):
 273             # be paranoid and check each row to have the same number of data
 274             l = len(data[0])
 275             for p in data[1:]:
 276                 if l != len(p):
 277                     raise ValueError("different number of columns per point")
 278             for v in columns.values():
 279                 if abs(v) > l or (not addlinenumbers and abs(v) == l):
 280                     raise ValueError("column number bigger than number of columns")
 281         self.data = data
 282         self.columns = columns
 283         self.title = title
 284         self.addlinenumbers = addlinenumbers
 285
 286
 287 ##############################################################
 288 # math tree enhanced by column number variables
 289 ##############################################################
 290
 291 class MathTreeFuncCol(mathtree.MathTreeFunc1):
 292
 293     def __init__(self, *args):
 294         mathtree.MathTreeFunc1.__init__(self, "_column_", *args)
 295
 296     def VarList(self):
 297         # we misuse VarList here:
 298         # - instead of returning a string, we return this instance itself
 299         # - before calculating the expression, you must call ColumnNameAndNumber
 300         #   once (when limiting the context to external defined variables,
 301         #   otherwise you have to call it each time)
 302         return [self]
 303
 304     def ColumnNameAndNumber(_hidden_self, **args):
 305         number = int(_hidden_self.Args[0].Calc(**args))
 306         _hidden_self.varname = "_column_%i" % number
 307         return _hidden_self.varname, number
 308
 309     def __str__(self):
 310         return self.varname
 311
 312     def Calc(_hidden_self, **args):
 313         return args[_hidden_self.varname]
 314
 315 MathTreeFuncsWithCol = mathtree.DefaultMathTreeFuncs + [MathTreeFuncCol]
 316
 317
 318 class columntree:
 319
 320     def __init__(self, tree):
 321         self.tree = tree
 322         self.Calc = tree.Calc
 323         self.__str__ = tree.__str__
 324
 325     def VarList(self):
 326         # returns a list of regular variables (strings) like the original mathtree
 327         return [var for var in self.tree.VarList() if not isinstance(var, MathTreeFuncCol) and var[:8] != "_column_"]
 328
 329     def columndict(_hidden_self, **context):
 330         # returns a dictionary of column names (keys) and column numbers (values)
 331         columndict = {}
 332         for var in _hidden_self.tree.VarList():
 333             if isinstance(var, MathTreeFuncCol):
 334                 name, number = var.ColumnNameAndNumber(**context)
 335                 columndict[name] = number
 336             elif var[:8] == "_column_":
 337                 columndict[var] = int(var[8:])
 338         return columndict
 339
 340
 341 class dataparser(mathtree.parser):
 342     # mathtree parser enhanced by column handling
 343     # parse returns a columntree instead of a regular tree
 344
 345     def __init__(self, MathTreeFuncs=MathTreeFuncsWithCol, **kwargs):
 346         mathtree.parser.__init__(self, MathTreeFuncs=MathTreeFuncs, **kwargs)
 347
 348     def parse(self, expr):
 349         return columntree(mathtree.parser.parse(self, expr.replace("$", "_column_")))
 350
 351 ##############################################################
 352
 353
 354 class _notitle:
 355     """this is a helper class to mark, that no title was privided
 356     (since a title equals None is a valid input, it needs to be
 357     distinguished from providing no title when a title will be
 358     created automatically)"""
 359     pass
 360
 361 class data(_data):
 362     "creates a new data set out of an existing data set"
 363
 364     def __init__(self, data, title=_notitle, parser=dataparser(), context={}, **columns):
 365         # build a nice title
 366         if title is _notitle:
 367             items = columns.items()
 368             items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
 369             self.title = data.title + ": " + ", ".join(["%s=%s" % item for item in items])
 370         else:
 371             self.title = title
 372
 373         self.orgdata = data
 374
 375         # analyse the **columns argument
 376         self.columns = {}
 377         newcolumns = {}
 378         for column, value in columns.items():
 379             try:
 380                 # try if it is a valid column identifier
 381                 self.columns[column] = self.orgdata.getcolumndataindex(value)
 382             except (KeyError, ValueError):
 383                 # take it as a mathematical expression
 384                 tree = parser.parse(value)
 385                 columndict = tree.columndict(**context)
 386                 vardataindex = []
 387                 for var, value in columndict.items():
 388                     # column data accessed via $<column number>
 389                     data, index = self.orgdata.getcolumndataindex(value)
 390                     vardataindex.append((var, data, index))
 391                 for var in tree.VarList():
 392                     try:
 393                         # column data accessed via the name of the column
 394                         data, index = self.orgdata.getcolumndataindex(var)
 395                     except (KeyError, ValueError):
 396                         # other data available in context
 397                         if var not in context.keys():
 398                             raise ValueError("undefined variable '%s'" % var)
 399                     else:
 400                         vardataindex.append((var, data, index))
 401                 newdata = [None]*self.getcount()
 402                 vars = context.copy() # do not modify context, use a copy vars instead
 403                 for i in xrange(self.getcount()):
 404                     # insert column data as prepared in vardataindex
 405                     for var, data, index in vardataindex:
 406                         if index is not None:
 407                             vars[var] = data[i][index]
 408                         else:
 409                             vars[var] = data[i]
 410                     # evaluate expression
 411                     newdata[i] = tree.Calc(**vars)
 412                     # we could also do:
 413                     # point[newcolumnnumber] = eval(str(tree), vars)
 414
 415                     # XXX: It might happen, that the evaluation of the expression
 416                     #      seems to work, but the result is NaN/Inf/-Inf. This
 417                     #      is highly plattform dependend.
 418
 419                 self.columns[column] = newdata, None
 420
 421     def getcolumndataindex(self, column):
 422         return self.columns[column]
 423
 424     def getcount(self):
 425         return self.orgdata.getcount()
 426
 427     def getdefaultstyle(self):
 428         return self.orgdata.getdefaultstyle()
 429
 430
 431 filecache = {}
 432
 433 class file(data):
 434
 435     defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
 436     defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
 437     defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
 438
 439     def splitline(self, line, stringpattern, columnpattern, tofloat=1):
 440         """returns a tuple created out of the string line
 441         - matches stringpattern and columnpattern, adds the first group of that
 442           match to the result and and removes those matches until the line is empty
 443         - when stringpattern matched, the result is always kept as a string
 444         - when columnpattern matched and tofloat is true, a conversion to a float
 445           is tried; when this conversion fails, the string is kept"""
 446         result = []
 447         # try to gain speed by skip matching regular expressions
 448         if line.find('"')!=-1 or \
 449            stringpattern is not self.defaultstringpattern or \
 450            columnpattern is not self.defaultcolumnpattern:
 451             while len(line):
 452                 match = stringpattern.match(line)
 453                 if match:
 454                     result.append(match.groups()[0])
 455                     line = line[match.end():]
 456                 else:
 457                     match = columnpattern.match(line)
 458                     if tofloat:
 459                         try:
 460                             result.append(float(match.groups()[0]))
 461                         except (TypeError, ValueError):
 462                             result.append(match.groups()[0])
 463                     else:
 464                         result.append(match.groups()[0])
 465                     line = line[match.end():]
 466         else:
 467             if tofloat:
 468                 try:
 469                     return map(float, line.split())
 470                 except (TypeError, ValueError):
 471                     result = []
 472                     for r in line.split():
 473                         try:
 474                             result.append(float(r))
 475                         except (TypeError, ValueError):
 476                             result.append(r)
 477             else:
 478                 return line.split()
 479         return result
 480
 481     def getcachekey(self, *args):
 482         return ":".join([str(x) for x in args])
 483
 484     def __init__(self, filename,
 485                        commentpattern=defaultcommentpattern,
 486                        stringpattern=defaultstringpattern,
 487                        columnpattern=defaultcolumnpattern,
 488                        skiphead=0, skiptail=0, every=1,
 489                        **kwargs):
 490
 491         def readfile(file, title):
 492             columns = {}
 493             points = []
 494             linenumber = 0
 495             maxcolumns = 0
 496             for line in file.readlines():
 497                 line = line.strip()
 498                 match = commentpattern.match(line)
 499                 if match:
 500                     if not len(points):
 501                         keys = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
 502                         i = 0
 503                         for key in keys:
 504                             i += 1 # the first column is number 1 since a linenumber is added in front
 505                             columns[key] = i
 506                 else:
 507                     linedata = []
 508                     for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
 509                         linedata.append(value)
 510                     if len(linedata):
 511                         if linenumber >= skiphead and not ((linenumber - skiphead) % every):
 512                             linedata = [linenumber + 1] + linedata
 513                             if len(linedata) > maxcolumns:
 514                                 maxcolumns = len(linedata)
 515                             points.append(linedata)
 516                         linenumber += 1
 517             if skiptail >= every:
 518                 skip, x = divmod(skiptail, every)
 519                 del points[-skip:]
 520             for i in xrange(len(points)):
 521                 if len(points[i]) != maxcolumns:
 522                     points[i].extend([None]*(maxcolumns-len(points[i])))
 523             return list(points, title=title, addlinenumbers=0, **columns)
 524
 525         try:
 526             filename.readlines
 527         except:
 528             # not a file-like object -> open it
 529             cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
 530             if not filecache.has_key(cachekey):
 531                 filecache[cachekey] = readfile(open(filename), filename)
 532             data.__init__(self, filecache[cachekey], **kwargs)
 533         else:
 534             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 535
 536
 537 conffilecache = {}
 538
 539 class conffile(data):
 540
 541     def __init__(self, filename, **kwargs):
 542         """read data from a config-like file
 543         - filename is a string
 544         - each row is defined by a section in the config-like file (see
 545           config module description)
 546         - the columns for each row are defined by lines in the section file;
 547           the option entries identify and name the columns
 548         - further keyword arguments are passed to the constructor of data,
 549           keyword arguments data and titles excluded"""
 550
 551         def readfile(file, title):
 552             config = ConfigParser.ConfigParser()
 553             config.optionxform = str
 554             config.readfp(file)
 555             sections = config.sections()
 556             sections.sort()
 557             points = [None]*len(sections)
 558             maxcolumns = 1
 559             columns = {}
 560             for i in xrange(len(sections)):
 561                 point = [sections[i]] + [None]*(maxcolumns-1)
 562                 for option in config.options(sections[i]):
 563                     value = config.get(sections[i], option)
 564                     try:
 565                         value = float(value)
 566                     except:
 567                         pass
 568                     try:
 569                         index = columns[option]
 570                     except KeyError:
 571                         columns[option] = maxcolumns
 572                         point.append(value)
 573                         maxcolumns += 1
 574                     else:
 575                         point[index] = value
 576                 points[i] = point
 577             return list(points, title=title, addlinenumbers=0, **columns)
 578
 579         try:
 580             filename.readlines
 581         except:
 582             # not a file-like object -> open it
 583             if not filecache.has_key(filename):
 584                 filecache[filename] = readfile(open(filename), filename)
 585             data.__init__(self, filecache[filename], **kwargs)
 586         else:
 587             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 588
 589
 590 class _linedata(_data):
 591
 592     defaultstyles = [style.line()]
 593
 594
 595 class function(_linedata):
 596
 597     def __init__(self, expression, title=_notitle, min=None, max=None,
 598                  points=100, parser=mathtree.parser(), context={}):
 599
 600         if title is _notitle:
 601             self.title = expression
 602         else:
 603             self.title = title
 604         self.min = min
 605         self.max = max
 606         self.numberofpoints = points
 607         self.context = context.copy() # be save on late evaluations
 608         self.yname, expression = [x.strip() for x in expression.split("=")]
 609         self.mathtree = parser.parse(expression)
 610
 611     def setstyles(self, graph, styles):
 612         self.xname = None
 613         for xname in self.mathtree.VarList():
 614             if xname in graph.axes.keys():
 615                 if self.xname is None:
 616                     self.xname = xname
 617                 else:
 618                     raise ValueError("multiple variables found")
 619         if self.xname is None:
 620             raise ValueError("no variable found")
 621         self.columns = {self.xname: 0, self.yname: 1}
 622         _linedata.setstyles(self, graph, styles)
 623
 624     def adjustaxes(self, graph, step):
 625         if step == 0:
 626             data = []
 627             if self.min is not None:
 628                 self.points.append(self.min)
 629             if self.max is not None:
 630                 self.points.append(self.max)
 631             for style in self.styles:
 632                 style.adjustaxis(self.styledata, graph, self.xname, data, None)
 633         elif step == 1:
 634             xaxis = graph.axes[self.xname]
 635             min, max = xaxis.getrange()
 636             if self.min is not None: min = self.min
 637             if self.max is not None: max = self.max
 638             vmin = xaxis.convert(min)
 639             vmax = xaxis.convert(max)
 640             self.data = []
 641             for i in range(self.numberofpoints):
 642                 v = vmin + (vmax-vmin)*i / (self.numberofpoints-1.0)
 643                 x = xaxis.invert(v)
 644                 self.context[self.xname] = x
 645                 try:
 646                     y = self.mathtree.Calc(**self.context)
 647                 except (ArithmeticError, ValueError):
 648                     y = None
 649                 self.data.append([x, y])
 650         elif step == 2:
 651             for style in self.styles:
 652                 style.adjustaxis(self.styledata, graph, self.yname, self.data, 1)
 653
 654
 655 class paramfunction(_linedata):
 656
 657     def __init__(self, varname, min, max, expression, title=_notitle, points=100, parser=mathtree.parser(), context={}):
 658         if title is _notitle:
 659             self.title = expression
 660         else:
 661             self.title = title
 662         varlist, expressionlist = expression.split("=")
 663         keys = varlist.split(",")
 664         mathtrees = parser.parse(expressionlist)
 665         if len(keys) != len(mathtrees):
 666             raise ValueError("unpack tuple of wrong size")
 667         l = len(keys)
 668         self.data = [None]*points
 669         self.columns = {}
 670         for index, key in enumerate(keys):
 671             self.columns[key.strip()] = index
 672         for i in range(points):
 673             param = min + (max-min)*i / (points-1.0)
 674             context[varname] = param
 675             self.data[i] = [None]*l
 676             for index, mathtree in enumerate(mathtrees):
 677                 self.data[i][index] = mathtree.Calc(**context)
 678