pyx/graph/data.py

   1 #!/usr/bin/env python
   2 # -*- coding: ISO-8859-1 -*-
   3 #
   4 #
   5 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
   6 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
   7 # Copyright (C) 2002-2005 André Wobst <wobsta@users.sourceforge.net>
   8 #
   9 # This file is part of PyX (http://pyx.sourceforge.net/).
  10 #
  11 # PyX is free software; you can redistribute it and/or modify
  12 # it under the terms of the GNU General Public License as published by
  13 # the Free Software Foundation; either version 2 of the License, or
  14 # (at your option) any later version.
  15 #
  16 # PyX is distributed in the hope that it will be useful,
  17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 # GNU General Public License for more details.
  20 #
  21 # You should have received a copy of the GNU General Public License
  22 # along with PyX; if not, write to the Free Software
  23 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
  24
  25 from __future__ import nested_scopes
  26
  27 import math, re, ConfigParser, struct, warnings
  28 from pyx import text
  29 from pyx.style import linestyle
  30 from pyx.graph import style
  31
  32 try:
  33     enumerate([])
  34 except NameError:
  35     # fallback implementation for Python 2.2 and below
  36     def enumerate(list):
  37         return zip(xrange(len(list)), list)
  38
  39 try:
  40     dict()
  41 except NameError:
  42     # fallback implementation for Python 2.1
  43     def dict(items):
  44         result = {}
  45         for key, value in items:
  46             result[key] = value
  47         return result
  48
  49
  50 def splitatvalue(value, *splitpoints):
  51     section = 0
  52     while section < len(splitpoints) and splitpoints[section] < value:
  53         section += 1
  54     if len(splitpoints) > 1:
  55         if section % 2:
  56             section = None
  57         else:
  58             section >>= 1
  59     return (section, value)
  60
  61
  62 _mathglobals = {"neg": lambda x: -x,
  63                 "abs": lambda x: x < 0 and -x or x,
  64                 "sgn": lambda x: x < 0 and -1 or 1,
  65                 "sqrt": math.sqrt,
  66                 "exp": math.exp,
  67                 "log": math.log,
  68                 "sin": math.sin,
  69                 "cos": math.cos,
  70                 "tan": math.tan,
  71                 "asin": math.asin,
  72                 "acos": math.acos,
  73                 "atan": math.atan,
  74                 "sind": lambda x: math.sin(math.pi/180*x),
  75                 "cosd": lambda x: math.cos(math.pi/180*x),
  76                 "tand": lambda x: math.tan(math.pi/180*x),
  77                 "asind": lambda x: 180/math.pi*math.asin(x),
  78                 "acosd": lambda x: 180/math.pi*math.acos(x),
  79                 "atand": lambda x: 180/math.pi*math.atan(x),
  80                 "norm": lambda x, y: math.hypot(x, y),
  81                 "splitatvalue": splitatvalue,
  82                 "pi": math.pi,
  83                 "e": math.e}
  84
  85
  86 class _data:
  87     """graph data interface
  88
  89     Graph data consists in columns, where each column might be identified by a
  90     string or an integer. Each row in the resulting table refers to a data
  91     point.
  92
  93     All methods except for the constructor should consider self and its
  94     attributes to be readonly, since the data instance might be shared between
  95     several graphs simultaniously.
  96
  97     The instance variable columns is a dictionary mapping column names to the
  98     data of the column (i.e. to a list). Only static columns (known at
  99     construction time) are contained in that dictionary. For data with numbered
 100     columns the column data is also available via the list columndata.
 101     Otherwise the columndata list should be missing and an access to a column
 102     number will fail.
 103
 104     The names of all columns (static and dynamic) must be fixed at the constructor
 105     and stated in the columnnames dictionary.
 106
 107     The instance variable title and defaultstyles contain the data title and
 108     the default styles (a list of styles), respectively.
 109     """
 110
 111     def dynamiccolumns(self, graph):
 112         """create and return dynamic columns data
 113
 114         Returns dynamic data matching the given axes (the axes range and other
 115         data might be used). The return value is a dictionary similar to the
 116         columns instance variable.
 117         """
 118         return {}
 119
 120
 121 class list(_data):
 122     "Graph data from a list of points"
 123
 124     defaultstyles = [style.symbol()]
 125
 126     def __init__(self, points, title="user provided list", addlinenumbers=1, **columns):
 127         if len(points):
 128             l = len(points[0])
 129             self.columndata = [[x] for x in points[0]]
 130             for point in points[1:]:
 131                 if l != len(point):
 132                     raise ValueError("different number of columns per point")
 133                 for i, x in enumerate(point):
 134                     self.columndata[i].append(x)
 135             for v in columns.values():
 136                 if abs(v) > l or (not addlinenumbers and abs(v) == l):
 137                     raise ValueError("column number bigger than number of columns")
 138             if addlinenumbers:
 139                 self.columndata = [range(1, len(points) + 1)] + self.columndata
 140             self.columns = dict([(key, self.columndata[i]) for key, i in columns.items()])
 141         else:
 142             self.columns = dict([(key, []) for key, i in columns])
 143         self.columnnames = self.columns.keys()
 144         self.title = title
 145
 146
 147 class _notitle:
 148     pass
 149
 150 _columnintref = re.compile(r"\$(-?\d+)", re.IGNORECASE)
 151
 152 class data(_data):
 153     "creates a new data set out of an existing data set"
 154
 155     def __init__(self, data, title=_notitle, context={}, copy=1,
 156                        replacedollar=1, columncallback="__column__", **columns):
 157         # build a nice title
 158         if title is _notitle:
 159             items = columns.items()
 160             items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
 161             self.title = "%s: %s" % (text.escapestring(data.title or "unkown source"),
 162                                      ", ".join(["%s=%s" % (text.escapestring(key),
 163                                                            text.escapestring(str(value)))
 164                                                 for key, value in items]))
 165         else:
 166             self.title = title
 167
 168         self.orgdata = data
 169         self.defaultstyles = self.orgdata.defaultstyles
 170
 171         # analyse the **columns argument
 172         self.columns = {}
 173         for columnname, value in columns.items():
 174             # search in the columns dictionary
 175             try:
 176                 self.columns[columnname] = self.orgdata.columns[value]
 177             except KeyError:
 178                 # search in the columndata list
 179                 try:
 180                     self.columns[columnname] = self.orgdata.columndata[value]
 181                 except (AttributeError, TypeError):
 182                     # value was not an valid column identifier
 183                     # i.e. take it as a mathematical expression
 184                     if replacedollar:
 185                         m = _columnintref.search(value)
 186                         while m:
 187                             value = "%s%s(%s)%s" % (value[:m.start()], columncallback, m.groups()[0], value[m.end():])
 188                             m = _columnintref.search(value)
 189                         value = value.replace("$", columncallback)
 190                     expression = compile(value.strip(), __file__, "eval")
 191                     context = context.copy()
 192                     context[columncallback] = self.columncallback
 193                     if self.orgdata.columns:
 194                         key, columndata = self.orgdata.columns.items()[0]
 195                         count = len(columndata)
 196                     elif self.orgdata.columndata:
 197                         count = len(self.orgdata.columndata[0])
 198                     else:
 199                         count = 0
 200                     newdata = []
 201                     for i in xrange(count):
 202                         self.columncallbackcount = i
 203                         for key, values in self.orgdata.columns.items():
 204                             context[key] = values[i]
 205                         try:
 206                             newdata.append(eval(expression, _mathglobals, context))
 207                         except (ArithmeticError, ValueError):
 208                             newdata.append(None)
 209                     self.columns[columnname] = newdata
 210
 211         if copy:
 212             # copy other, non-conflicting column names
 213             for columnname, columndata in self.orgdata.columns.items():
 214                 if not self.columns.has_key(columnname):
 215                     self.columns[columnname] = columndata
 216
 217         self.columnnames = self.columns.keys()
 218
 219     def columncallback(self, value):
 220         try:
 221             return self.orgdata.columndata[value][self.columncallbackcount]
 222         except:
 223             return self.orgdata.columns[value][self.columncallbackcount]
 224
 225
 226 filecache = {}
 227
 228 class file(data):
 229
 230     defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
 231     defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
 232     defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
 233
 234     def splitline(self, line, stringpattern, columnpattern, tofloat=1):
 235         """returns a tuple created out of the string line
 236         - matches stringpattern and columnpattern, adds the first group of that
 237           match to the result and and removes those matches until the line is empty
 238         - when stringpattern matched, the result is always kept as a string
 239         - when columnpattern matched and tofloat is true, a conversion to a float
 240           is tried; when this conversion fails, the string is kept"""
 241         result = []
 242         # try to gain speed by skip matching regular expressions
 243         if line.find('"')!=-1 or \
 244            stringpattern is not self.defaultstringpattern or \
 245            columnpattern is not self.defaultcolumnpattern:
 246             while len(line):
 247                 match = stringpattern.match(line)
 248                 if match:
 249                     result.append(match.groups()[0])
 250                     line = line[match.end():]
 251                 else:
 252                     match = columnpattern.match(line)
 253                     if tofloat:
 254                         try:
 255                             result.append(float(match.groups()[0]))
 256                         except (TypeError, ValueError):
 257                             result.append(match.groups()[0])
 258                     else:
 259                         result.append(match.groups()[0])
 260                     line = line[match.end():]
 261         else:
 262             if tofloat:
 263                 try:
 264                     return map(float, line.split())
 265                 except (TypeError, ValueError):
 266                     result = []
 267                     for r in line.split():
 268                         try:
 269                             result.append(float(r))
 270                         except (TypeError, ValueError):
 271                             result.append(r)
 272             else:
 273                 return line.split()
 274         return result
 275
 276     def getcachekey(self, *args):
 277         return ":".join([str(x) for x in args])
 278
 279     def __init__(self, filename,
 280                        commentpattern=defaultcommentpattern,
 281                        stringpattern=defaultstringpattern,
 282                        columnpattern=defaultcolumnpattern,
 283                        skiphead=0, skiptail=0, every=1,
 284                        **kwargs):
 285
 286         def readfile(file, title, self=self, commentpattern=commentpattern, stringpattern=stringpattern, columnpattern=columnpattern, skiphead=skiphead, skiptail=skiptail, every=every):
 287             columns = []
 288             columndata = []
 289             linenumber = 0
 290             maxcolumns = 0
 291             for line in file.readlines():
 292                 line = line.strip()
 293                 match = commentpattern.match(line)
 294                 if match:
 295                     if not len(columndata):
 296                         columns = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
 297                 else:
 298                     linedata = []
 299                     for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
 300                         linedata.append(value)
 301                     if len(linedata):
 302                         if linenumber >= skiphead and not ((linenumber - skiphead) % every):
 303                             linedata = [linenumber + 1] + linedata
 304                             if len(linedata) > maxcolumns:
 305                                 maxcolumns = len(linedata)
 306                             columndata.append(linedata)
 307                         linenumber += 1
 308             if skiptail >= every:
 309                 skip, x = divmod(skiptail, every)
 310                 del columndata[-skip:]
 311             for i in xrange(len(columndata)):
 312                 if len(columndata[i]) != maxcolumns:
 313                     columndata[i].extend([None]*(maxcolumns-len(columndata[i])))
 314             return list(columndata, title=title, addlinenumbers=0,
 315                         **dict([(column, i+1) for i, column in enumerate(columns[:maxcolumns-1])]))
 316
 317         try:
 318             filename.readlines
 319         except:
 320             # not a file-like object -> open it
 321             cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
 322             if not filecache.has_key(cachekey):
 323                 filecache[cachekey] = readfile(open(filename), filename)
 324             data.__init__(self, filecache[cachekey], **kwargs)
 325         else:
 326             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 327
 328
 329 conffilecache = {}
 330
 331 class conffile(data):
 332
 333     def __init__(self, filename, **kwargs):
 334         """read data from a config-like file
 335         - filename is a string
 336         - each row is defined by a section in the config-like file (see
 337           config module description)
 338         - the columns for each row are defined by lines in the section file;
 339           the option entries identify and name the columns
 340         - further keyword arguments are passed to the constructor of data,
 341           keyword arguments data and titles excluded"""
 342
 343         def readfile(file, title):
 344             config = ConfigParser.ConfigParser()
 345             config.optionxform = str
 346             config.readfp(file)
 347             sections = config.sections()
 348             sections.sort()
 349             columndata = [None]*len(sections)
 350             maxcolumns = 1
 351             columns = {}
 352             for i in xrange(len(sections)):
 353                 point = [sections[i]] + [None]*(maxcolumns-1)
 354                 for option in config.options(sections[i]):
 355                     value = config.get(sections[i], option)
 356                     try:
 357                         value = float(value)
 358                     except:
 359                         pass
 360                     try:
 361                         index = columns[option]
 362                     except KeyError:
 363                         columns[option] = maxcolumns
 364                         point.append(value)
 365                         maxcolumns += 1
 366                     else:
 367                         point[index] = value
 368                 columndata[i] = point
 369             # wrap result into a data instance to remove column numbers
 370             result = data(list(columndata, addlinenumbers=0, **columns), title=title)
 371             # ... but reinsert sections as linenumbers
 372             result.columndata = [[x[0] for x in columndata]]
 373             return result
 374
 375         try:
 376             filename.readlines
 377         except:
 378             # not a file-like object -> open it
 379             if not filecache.has_key(filename):
 380                 filecache[filename] = readfile(open(filename), filename)
 381             data.__init__(self, filecache[filename], **kwargs)
 382         else:
 383             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 384
 385
 386 cbdfilecache = {}
 387
 388 class cbdfile(data):
 389
 390     defaultstyles = [style.line()]
 391
 392     def getcachekey(self, *args):
 393         return ":".join([str(x) for x in args])
 394
 395     def __init__(self, filename, minrank=None, maxrank=None, **kwargs):
 396
 397         class cbdhead:
 398
 399             def __init__(self, file):
 400                 (self.magic,
 401                  self.dictaddr,
 402                  self.segcount,
 403                  self.segsize,
 404                  self.segmax,
 405                  self.fill) = struct.unpack("<5i20s", file.read(40))
 406                 if self.magic != 0x20770002:
 407                     raise ValueError("bad magic number")
 408
 409         class segdict:
 410
 411             def __init__(self, file, i):
 412                 self.index = i
 413                 (self.segid,
 414                  self.maxlat,
 415                  self.minlat,
 416                  self.maxlong,
 417                  self.minlong,
 418                  self.absaddr,
 419                  self.nbytes,
 420                  self.rank) = struct.unpack("<6i2h", file.read(28))
 421
 422         class segment:
 423
 424             def __init__(self, file, sd):
 425                 file.seek(sd.absaddr)
 426                 (self.orgx,
 427                  self.orgy,
 428                  self.id,
 429                  self.nstrokes,
 430                  self.dummy) = struct.unpack("<3i2h", file.read(16))
 431                 oln, olt = self.orgx, self.orgy
 432                 self.points = [(olt, oln)]
 433                 for i in range(self.nstrokes):
 434                     c1, c2 = struct.unpack("2c", file.read(2))
 435                     if ord(c2) & 0x40:
 436                         if c1 > "\177":
 437                             dy = ord(c1) - 256
 438                         else:
 439                             dy = ord(c1)
 440                         if c2 > "\177":
 441                             dx = ord(c2) - 256
 442                         else:
 443                             dx = ord(c2) - 64
 444                     else:
 445                         c3, c4, c5, c6, c7, c8 = struct.unpack("6c", file.read(6))
 446                         if c2 > "\177":
 447                             c2 = chr(ord(c2) | 0x40)
 448                         dx, dy = struct.unpack("<2i", c3+c4+c1+c2+c7+c8+c5+c6)
 449                     oln += dx
 450                     olt += dy
 451                     self.points.append((olt, oln))
 452                 sd.nstrokes = self.nstrokes
 453
 454         def readfile(file, title):
 455             h = cbdhead(file)
 456             file.seek(h.dictaddr)
 457             sds = [segdict(file, i+1) for i in range(h.segcount)]
 458             sbs = [segment(file, sd) for sd in sds]
 459
 460             # remove jumps at long +/- 180
 461             for sd, sb in zip(sds, sbs):
 462                 if sd.minlong < -150*3600 and sd.maxlong > 150*3600:
 463                     for i, (lat, long) in enumerate(sb.points):
 464                          if long < 0:
 465                              sb.points[i] = lat, long + 360*3600
 466
 467             columndata = []
 468             for sd, sb in zip(sds, sbs):
 469                 if ((minrank is None or sd.rank >= minrank) and
 470                     (maxrank is None or sd.rank <= maxrank)):
 471                     if columndata:
 472                         columndata.append((None, None))
 473                     columndata.extend([(long/3600.0, lat/3600.0)
 474                                        for lat, long in sb.points])
 475
 476             result = list(columndata, title=title)
 477             result.defaultstyles = self.defaultstyles
 478             return result
 479
 480
 481         try:
 482             filename.readlines
 483         except:
 484             # not a file-like object -> open it
 485             cachekey = self.getcachekey(filename, minrank, maxrank)
 486             if not cbdfilecache.has_key(cachekey):
 487                 cbdfilecache[cachekey] = readfile(open(filename, "rb"), filename)
 488             data.__init__(self, cbdfilecache[cachekey], **kwargs)
 489         else:
 490             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 491
 492
 493 class function(_data):
 494
 495     defaultstyles = [style.line()]
 496
 497     assignmentpattern = re.compile(r"\s*([a-z_][a-z0-9_]*)\s*\(\s*([a-z_][a-z0-9_]*)\s*\)\s*=", re.IGNORECASE)
 498
 499     def __init__(self, expression, title=_notitle, min=None, max=None,
 500                  points=100, context={}):
 501
 502         if title is _notitle:
 503             self.title = expression
 504         else:
 505             self.title = title
 506         self.min = min
 507         self.max = max
 508         self.numberofpoints = points
 509         self.context = context.copy() # be save on late evaluations
 510         m = self.assignmentpattern.match(expression)
 511         if m:
 512             self.yname, self.xname = m.groups()
 513             expression = expression[m.end():]
 514         else:
 515             raise ValueError("y(x)=... or similar expected")
 516         if context.has_key(self.xname):
 517             raise ValueError("xname in context")
 518         self.expression = compile(expression.strip(), __file__, "eval")
 519         self.columns = {}
 520         self.columnnames = [self.xname, self.yname]
 521
 522     def dynamiccolumns(self, graph):
 523         dynamiccolumns = {self.xname: [], self.yname: []}
 524
 525         xaxis = graph.axes[self.xname]
 526         from pyx.graph.axis import logarithmic
 527         logaxis = isinstance(xaxis.axis, logarithmic)
 528         if self.min is not None:
 529             min = self.min
 530         else:
 531             min = xaxis.data.min
 532         if self.max is not None:
 533             max = self.max
 534         else:
 535             max = xaxis.data.max
 536         if logaxis:
 537             min = math.log(min)
 538             max = math.log(max)
 539         for i in range(self.numberofpoints):
 540             x = min + (max-min)*i / (self.numberofpoints-1.0)
 541             if logaxis:
 542                 x = math.exp(x)
 543             dynamiccolumns[self.xname].append(x)
 544             self.context[self.xname] = x
 545             try:
 546                 y = eval(self.expression, _mathglobals, self.context)
 547             except (ArithmeticError, ValueError):
 548                 y = None
 549             dynamiccolumns[self.yname].append(y)
 550         return dynamiccolumns
 551
 552
 553 class functionxy(function):
 554
 555     def __init__(self, f, min=None, max=None, **kwargs):
 556         function.__init__(self, "y(x)=f(x)", context={"f": f}, min=min, max=max, **kwargs)
 557
 558
 559 class paramfunction(_data):
 560
 561     defaultstyles = [style.line()]
 562
 563     def __init__(self, varname, min, max, expression, title=_notitle, points=100, context={}):
 564         if context.has_key(varname):
 565             raise ValueError("varname in context")
 566         if title is _notitle:
 567             self.title = expression
 568         else:
 569             self.title = title
 570         varlist, expression = expression.split("=")
 571         expression = compile(expression.strip(), __file__, "eval")
 572         keys = [key.strip() for key in varlist.split(",")]
 573         self.columns = dict([(key, []) for key in keys])
 574         context = context.copy()
 575         for i in range(points):
 576             param = min + (max-min)*i / (points-1.0)
 577             context[varname] = param
 578             values = eval(expression, _mathglobals, context)
 579             for key, value in zip(keys, values):
 580                 self.columns[key].append(value)
 581         if len(keys) != len(values):
 582             raise ValueError("unpack tuple of wrong size")
 583         self.columnnames = self.columns.keys()
 584
 585
 586 class paramfunctionxy(paramfunction):
 587
 588     def __init__(self, f, min, max, **kwargs):
 589         paramfunction.__init__(self, "t", min, max, "x, y = f(t)", context={"f": f}, **kwargs)