pyx/graph/data.py

   1 # -*- coding: ISO-8859-1 -*-
   2 #
   3 #
   4 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
   5 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
   6 # Copyright (C) 2002-2005 André Wobst <wobsta@users.sourceforge.net>
   7 #
   8 # This file is part of PyX (http://pyx.sourceforge.net/).
   9 #
  10 # PyX is free software; you can redistribute it and/or modify
  11 # it under the terms of the GNU General Public License as published by
  12 # the Free Software Foundation; either version 2 of the License, or
  13 # (at your option) any later version.
  14 #
  15 # PyX is distributed in the hope that it will be useful,
  16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 # GNU General Public License for more details.
  19 #
  20 # You should have received a copy of the GNU General Public License
  21 # along with PyX; if not, write to the Free Software
  22 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
  23
  24 from __future__ import nested_scopes
  25
  26 import math, re, ConfigParser, struct, warnings
  27 from pyx import text
  28 from pyx.style import linestyle
  29 from pyx.graph import style
  30
  31 try:
  32     enumerate([])
  33 except NameError:
  34     # fallback implementation for Python 2.2 and below
  35     def enumerate(list):
  36         return zip(xrange(len(list)), list)
  37
  38 try:
  39     dict()
  40 except NameError:
  41     # fallback implementation for Python 2.1
  42     def dict(items):
  43         result = {}
  44         for key, value in items:
  45             result[key] = value
  46         return result
  47
  48
  49 def splitatvalue(value, *splitpoints):
  50     section = 0
  51     while section < len(splitpoints) and splitpoints[section] < value:
  52         section += 1
  53     if len(splitpoints) > 1:
  54         if section % 2:
  55             section = None
  56         else:
  57             section >>= 1
  58     return (section, value)
  59
  60
  61 _mathglobals = {"neg": lambda x: -x,
  62                 "abs": lambda x: x < 0 and -x or x,
  63                 "sgn": lambda x: x < 0 and -1 or 1,
  64                 "sqrt": math.sqrt,
  65                 "exp": math.exp,
  66                 "log": math.log,
  67                 "sin": math.sin,
  68                 "cos": math.cos,
  69                 "tan": math.tan,
  70                 "asin": math.asin,
  71                 "acos": math.acos,
  72                 "atan": math.atan,
  73                 "sind": lambda x: math.sin(math.pi/180*x),
  74                 "cosd": lambda x: math.cos(math.pi/180*x),
  75                 "tand": lambda x: math.tan(math.pi/180*x),
  76                 "asind": lambda x: 180/math.pi*math.asin(x),
  77                 "acosd": lambda x: 180/math.pi*math.acos(x),
  78                 "atand": lambda x: 180/math.pi*math.atan(x),
  79                 "norm": lambda x, y: math.hypot(x, y),
  80                 "splitatvalue": splitatvalue,
  81                 "pi": math.pi,
  82                 "e": math.e}
  83
  84
  85 class _data:
  86     """graph data interface
  87
  88     Graph data consists in columns, where each column might be identified by a
  89     string or an integer. Each row in the resulting table refers to a data
  90     point.
  91
  92     All methods except for the constructor should consider self and its
  93     attributes to be readonly, since the data instance might be shared between
  94     several graphs simultaniously.
  95
  96     The instance variable columns is a dictionary mapping column names to the
  97     data of the column (i.e. to a list). Only static columns (known at
  98     construction time) are contained in that dictionary. For data with numbered
  99     columns the column data is also available via the list columndata.
 100     Otherwise the columndata list should be missing and an access to a column
 101     number will fail.
 102
 103     The names of all columns (static and dynamic) must be fixed at the constructor
 104     and stated in the columnnames dictionary.
 105
 106     The instance variable title and defaultstyles contain the data title and
 107     the default styles (a list of styles), respectively.
 108     """
 109
 110     def dynamiccolumns(self, graph):
 111         """create and return dynamic columns data
 112
 113         Returns dynamic data matching the given axes (the axes range and other
 114         data might be used). The return value is a dictionary similar to the
 115         columns instance variable.
 116         """
 117         return {}
 118
 119
 120 class list(_data):
 121     "Graph data from a list of points"
 122
 123     defaultstyles = [style.symbol()]
 124
 125     def __init__(self, points, title="user provided list", addlinenumbers=1, **columns):
 126         if len(points):
 127             l = len(points[0])
 128             self.columndata = [[x] for x in points[0]]
 129             for point in points[1:]:
 130                 if l != len(point):
 131                     raise ValueError("different number of columns per point")
 132                 for i, x in enumerate(point):
 133                     self.columndata[i].append(x)
 134             for v in columns.values():
 135                 if abs(v) > l or (not addlinenumbers and abs(v) == l):
 136                     raise ValueError("column number bigger than number of columns")
 137             if addlinenumbers:
 138                 self.columndata = [range(1, len(points) + 1)] + self.columndata
 139             self.columns = dict([(key, self.columndata[i]) for key, i in columns.items()])
 140         else:
 141             self.columns = dict([(key, []) for key, i in columns])
 142         self.columnnames = self.columns.keys()
 143         self.title = title
 144
 145
 146 class _notitle:
 147     pass
 148
 149 _columnintref = re.compile(r"\$(-?\d+)", re.IGNORECASE)
 150
 151 class data(_data):
 152     "creates a new data set out of an existing data set"
 153
 154     def __init__(self, data, title=_notitle, context={}, copy=1,
 155                        replacedollar=1, columncallback="__column__", **columns):
 156         # build a nice title
 157         if title is _notitle:
 158             items = columns.items()
 159             items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
 160             self.title = "%s: %s" % (text.escapestring(data.title or "unkown source"),
 161                                      ", ".join(["%s=%s" % (text.escapestring(key),
 162                                                            text.escapestring(str(value)))
 163                                                 for key, value in items]))
 164         else:
 165             self.title = title
 166
 167         self.orgdata = data
 168         self.defaultstyles = self.orgdata.defaultstyles
 169
 170         # analyse the **columns argument
 171         self.columns = {}
 172         for columnname, value in columns.items():
 173             # search in the columns dictionary
 174             try:
 175                 self.columns[columnname] = self.orgdata.columns[value]
 176             except KeyError:
 177                 # search in the columndata list
 178                 try:
 179                     self.columns[columnname] = self.orgdata.columndata[value]
 180                 except (AttributeError, TypeError):
 181                     # value was not an valid column identifier
 182                     # i.e. take it as a mathematical expression
 183                     if replacedollar:
 184                         m = _columnintref.search(value)
 185                         while m:
 186                             value = "%s%s(%s)%s" % (value[:m.start()], columncallback, m.groups()[0], value[m.end():])
 187                             m = _columnintref.search(value)
 188                         value = value.replace("$", columncallback)
 189                     expression = compile(value.strip(), __file__, "eval")
 190                     context = context.copy()
 191                     context[columncallback] = self.columncallback
 192                     if self.orgdata.columns:
 193                         key, columndata = self.orgdata.columns.items()[0]
 194                         count = len(columndata)
 195                     elif self.orgdata.columndata:
 196                         count = len(self.orgdata.columndata[0])
 197                     else:
 198                         count = 0
 199                     newdata = []
 200                     for i in xrange(count):
 201                         self.columncallbackcount = i
 202                         for key, values in self.orgdata.columns.items():
 203                             context[key] = values[i]
 204                         try:
 205                             newdata.append(eval(expression, _mathglobals, context))
 206                         except (ArithmeticError, ValueError):
 207                             newdata.append(None)
 208                     self.columns[columnname] = newdata
 209
 210         if copy:
 211             # copy other, non-conflicting column names
 212             for columnname, columndata in self.orgdata.columns.items():
 213                 if not self.columns.has_key(columnname):
 214                     self.columns[columnname] = columndata
 215
 216         self.columnnames = self.columns.keys()
 217
 218     def columncallback(self, value):
 219         try:
 220             return self.orgdata.columndata[value][self.columncallbackcount]
 221         except:
 222             return self.orgdata.columns[value][self.columncallbackcount]
 223
 224
 225 filecache = {}
 226
 227 class file(data):
 228
 229     defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
 230     defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
 231     defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
 232
 233     def splitline(self, line, stringpattern, columnpattern, tofloat=1):
 234         """returns a tuple created out of the string line
 235         - matches stringpattern and columnpattern, adds the first group of that
 236           match to the result and and removes those matches until the line is empty
 237         - when stringpattern matched, the result is always kept as a string
 238         - when columnpattern matched and tofloat is true, a conversion to a float
 239           is tried; when this conversion fails, the string is kept"""
 240         result = []
 241         # try to gain speed by skip matching regular expressions
 242         if line.find('"')!=-1 or \
 243            stringpattern is not self.defaultstringpattern or \
 244            columnpattern is not self.defaultcolumnpattern:
 245             while len(line):
 246                 match = stringpattern.match(line)
 247                 if match:
 248                     result.append(match.groups()[0])
 249                     line = line[match.end():]
 250                 else:
 251                     match = columnpattern.match(line)
 252                     if tofloat:
 253                         try:
 254                             result.append(float(match.groups()[0]))
 255                         except (TypeError, ValueError):
 256                             result.append(match.groups()[0])
 257                     else:
 258                         result.append(match.groups()[0])
 259                     line = line[match.end():]
 260         else:
 261             if tofloat:
 262                 try:
 263                     return map(float, line.split())
 264                 except (TypeError, ValueError):
 265                     result = []
 266                     for r in line.split():
 267                         try:
 268                             result.append(float(r))
 269                         except (TypeError, ValueError):
 270                             result.append(r)
 271             else:
 272                 return line.split()
 273         return result
 274
 275     def getcachekey(self, *args):
 276         return ":".join([str(x) for x in args])
 277
 278     def __init__(self, filename,
 279                        commentpattern=defaultcommentpattern,
 280                        stringpattern=defaultstringpattern,
 281                        columnpattern=defaultcolumnpattern,
 282                        skiphead=0, skiptail=0, every=1,
 283                        **kwargs):
 284
 285         def readfile(file, title, self=self, commentpattern=commentpattern, stringpattern=stringpattern, columnpattern=columnpattern, skiphead=skiphead, skiptail=skiptail, every=every):
 286             columns = []
 287             columndata = []
 288             linenumber = 0
 289             maxcolumns = 0
 290             for line in file.readlines():
 291                 line = line.strip()
 292                 match = commentpattern.match(line)
 293                 if match:
 294                     if not len(columndata):
 295                         columns = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
 296                 else:
 297                     linedata = []
 298                     for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
 299                         linedata.append(value)
 300                     if len(linedata):
 301                         if linenumber >= skiphead and not ((linenumber - skiphead) % every):
 302                             linedata = [linenumber + 1] + linedata
 303                             if len(linedata) > maxcolumns:
 304                                 maxcolumns = len(linedata)
 305                             columndata.append(linedata)
 306                         linenumber += 1
 307             if skiptail >= every:
 308                 skip, x = divmod(skiptail, every)
 309                 del columndata[-skip:]
 310             for i in xrange(len(columndata)):
 311                 if len(columndata[i]) != maxcolumns:
 312                     columndata[i].extend([None]*(maxcolumns-len(columndata[i])))
 313             return list(columndata, title=title, addlinenumbers=0,
 314                         **dict([(column, i+1) for i, column in enumerate(columns[:maxcolumns-1])]))
 315
 316         try:
 317             filename.readlines
 318         except:
 319             # not a file-like object -> open it
 320             cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
 321             if not filecache.has_key(cachekey):
 322                 filecache[cachekey] = readfile(open(filename), filename)
 323             data.__init__(self, filecache[cachekey], **kwargs)
 324         else:
 325             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 326
 327
 328 conffilecache = {}
 329
 330 class conffile(data):
 331
 332     def __init__(self, filename, **kwargs):
 333         """read data from a config-like file
 334         - filename is a string
 335         - each row is defined by a section in the config-like file (see
 336           config module description)
 337         - the columns for each row are defined by lines in the section file;
 338           the option entries identify and name the columns
 339         - further keyword arguments are passed to the constructor of data,
 340           keyword arguments data and titles excluded"""
 341
 342         def readfile(file, title):
 343             config = ConfigParser.ConfigParser()
 344             config.optionxform = str
 345             config.readfp(file)
 346             sections = config.sections()
 347             sections.sort()
 348             columndata = [None]*len(sections)
 349             maxcolumns = 1
 350             columns = {}
 351             for i in xrange(len(sections)):
 352                 point = [sections[i]] + [None]*(maxcolumns-1)
 353                 for option in config.options(sections[i]):
 354                     value = config.get(sections[i], option)
 355                     try:
 356                         value = float(value)
 357                     except:
 358                         pass
 359                     try:
 360                         index = columns[option]
 361                     except KeyError:
 362                         columns[option] = maxcolumns
 363                         point.append(value)
 364                         maxcolumns += 1
 365                     else:
 366                         point[index] = value
 367                 columndata[i] = point
 368             # wrap result into a data instance to remove column numbers
 369             result = data(list(columndata, addlinenumbers=0, **columns), title=title)
 370             # ... but reinsert sections as linenumbers
 371             result.columndata = [[x[0] for x in columndata]]
 372             return result
 373
 374         try:
 375             filename.readlines
 376         except:
 377             # not a file-like object -> open it
 378             if not filecache.has_key(filename):
 379                 filecache[filename] = readfile(open(filename), filename)
 380             data.__init__(self, filecache[filename], **kwargs)
 381         else:
 382             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 383
 384
 385 cbdfilecache = {}
 386
 387 class cbdfile(data):
 388
 389     defaultstyles = [style.line()]
 390
 391     def getcachekey(self, *args):
 392         return ":".join([str(x) for x in args])
 393
 394     def __init__(self, filename, minrank=None, maxrank=None, **kwargs):
 395
 396         class cbdhead:
 397
 398             def __init__(self, file):
 399                 (self.magic,
 400                  self.dictaddr,
 401                  self.segcount,
 402                  self.segsize,
 403                  self.segmax,
 404                  self.fill) = struct.unpack("<5i20s", file.read(40))
 405                 if self.magic != 0x20770002:
 406                     raise ValueError("bad magic number")
 407
 408         class segdict:
 409
 410             def __init__(self, file, i):
 411                 self.index = i
 412                 (self.segid,
 413                  self.maxlat,
 414                  self.minlat,
 415                  self.maxlong,
 416                  self.minlong,
 417                  self.absaddr,
 418                  self.nbytes,
 419                  self.rank) = struct.unpack("<6i2h", file.read(28))
 420
 421         class segment:
 422
 423             def __init__(self, file, sd):
 424                 file.seek(sd.absaddr)
 425                 (self.orgx,
 426                  self.orgy,
 427                  self.id,
 428                  self.nstrokes,
 429                  self.dummy) = struct.unpack("<3i2h", file.read(16))
 430                 oln, olt = self.orgx, self.orgy
 431                 self.points = [(olt, oln)]
 432                 for i in range(self.nstrokes):
 433                     c1, c2 = struct.unpack("2c", file.read(2))
 434                     if ord(c2) & 0x40:
 435                         if c1 > "\177":
 436                             dy = ord(c1) - 256
 437                         else:
 438                             dy = ord(c1)
 439                         if c2 > "\177":
 440                             dx = ord(c2) - 256
 441                         else:
 442                             dx = ord(c2) - 64
 443                     else:
 444                         c3, c4, c5, c6, c7, c8 = struct.unpack("6c", file.read(6))
 445                         if c2 > "\177":
 446                             c2 = chr(ord(c2) | 0x40)
 447                         dx, dy = struct.unpack("<2i", c3+c4+c1+c2+c7+c8+c5+c6)
 448                     oln += dx
 449                     olt += dy
 450                     self.points.append((olt, oln))
 451                 sd.nstrokes = self.nstrokes
 452
 453         def readfile(file, title):
 454             h = cbdhead(file)
 455             file.seek(h.dictaddr)
 456             sds = [segdict(file, i+1) for i in range(h.segcount)]
 457             sbs = [segment(file, sd) for sd in sds]
 458
 459             # remove jumps at long +/- 180
 460             for sd, sb in zip(sds, sbs):
 461                 if sd.minlong < -150*3600 and sd.maxlong > 150*3600:
 462                     for i, (lat, long) in enumerate(sb.points):
 463                          if long < 0:
 464                              sb.points[i] = lat, long + 360*3600
 465
 466             columndata = []
 467             for sd, sb in zip(sds, sbs):
 468                 if ((minrank is None or sd.rank >= minrank) and
 469                     (maxrank is None or sd.rank <= maxrank)):
 470                     if columndata:
 471                         columndata.append((None, None))
 472                     columndata.extend([(long/3600.0, lat/3600.0)
 473                                        for lat, long in sb.points])
 474
 475             result = list(columndata, title=title)
 476             result.defaultstyles = self.defaultstyles
 477             return result
 478
 479
 480         try:
 481             filename.readlines
 482         except:
 483             # not a file-like object -> open it
 484             cachekey = self.getcachekey(filename, minrank, maxrank)
 485             if not cbdfilecache.has_key(cachekey):
 486                 cbdfilecache[cachekey] = readfile(open(filename, "rb"), filename)
 487             data.__init__(self, cbdfilecache[cachekey], **kwargs)
 488         else:
 489             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 490
 491
 492 class function(_data):
 493
 494     defaultstyles = [style.line()]
 495
 496     assignmentpattern = re.compile(r"\s*([a-z_][a-z0-9_]*)\s*\(\s*([a-z_][a-z0-9_]*)\s*\)\s*=", re.IGNORECASE)
 497
 498     def __init__(self, expression, title=_notitle, min=None, max=None,
 499                  points=100, context={}):
 500
 501         if title is _notitle:
 502             self.title = expression
 503         else:
 504             self.title = title
 505         self.min = min
 506         self.max = max
 507         self.numberofpoints = points
 508         self.context = context.copy() # be save on late evaluations
 509         m = self.assignmentpattern.match(expression)
 510         if m:
 511             self.yname, self.xname = m.groups()
 512             expression = expression[m.end():]
 513         else:
 514             raise ValueError("y(x)=... or similar expected")
 515         if context.has_key(self.xname):
 516             raise ValueError("xname in context")
 517         self.expression = compile(expression.strip(), __file__, "eval")
 518         self.columns = {}
 519         self.columnnames = [self.xname, self.yname]
 520
 521     def dynamiccolumns(self, graph):
 522         dynamiccolumns = {self.xname: [], self.yname: []}
 523
 524         xaxis = graph.axes[self.xname]
 525         from pyx.graph.axis import logarithmic
 526         logaxis = isinstance(xaxis.axis, logarithmic)
 527         if self.min is not None:
 528             min = self.min
 529         else:
 530             min = xaxis.data.min
 531         if self.max is not None:
 532             max = self.max
 533         else:
 534             max = xaxis.data.max
 535         if logaxis:
 536             min = math.log(min)
 537             max = math.log(max)
 538         for i in range(self.numberofpoints):
 539             x = min + (max-min)*i / (self.numberofpoints-1.0)
 540             if logaxis:
 541                 x = math.exp(x)
 542             dynamiccolumns[self.xname].append(x)
 543             self.context[self.xname] = x
 544             try:
 545                 y = eval(self.expression, _mathglobals, self.context)
 546             except (ArithmeticError, ValueError):
 547                 y = None
 548             dynamiccolumns[self.yname].append(y)
 549         return dynamiccolumns
 550
 551
 552 class functionxy(function):
 553
 554     def __init__(self, f, min=None, max=None, **kwargs):
 555         function.__init__(self, "y(x)=f(x)", context={"f": f}, min=min, max=max, **kwargs)
 556
 557
 558 class paramfunction(_data):
 559
 560     defaultstyles = [style.line()]
 561
 562     def __init__(self, varname, min, max, expression, title=_notitle, points=100, context={}):
 563         if context.has_key(varname):
 564             raise ValueError("varname in context")
 565         if title is _notitle:
 566             self.title = expression
 567         else:
 568             self.title = title
 569         varlist, expression = expression.split("=")
 570         expression = compile(expression.strip(), __file__, "eval")
 571         keys = [key.strip() for key in varlist.split(",")]
 572         self.columns = dict([(key, []) for key in keys])
 573         context = context.copy()
 574         for i in range(points):
 575             param = min + (max-min)*i / (points-1.0)
 576             context[varname] = param
 577             values = eval(expression, _mathglobals, context)
 578             for key, value in zip(keys, values):
 579                 self.columns[key].append(value)
 580         if len(keys) != len(values):
 581             raise ValueError("unpack tuple of wrong size")
 582         self.columnnames = self.columns.keys()
 583
 584
 585 class paramfunctionxy(paramfunction):
 586
 587     def __init__(self, f, min, max, **kwargs):
 588         paramfunction.__init__(self, "t", min, max, "x, y = f(t)", context={"f": f}, **kwargs)