pyx/graph/data.py

   1 # -*- encoding: utf-8 -*-
   2 #
   3 #
   4 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
   5 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
   6 # Copyright (C) 2002-2012 André Wobst <wobsta@users.sourceforge.net>
   7 #
   8 # This file is part of PyX (http://pyx.sourceforge.net/).
   9 #
  10 # PyX is free software; you can redistribute it and/or modify
  11 # it under the terms of the GNU General Public License as published by
  12 # the Free Software Foundation; either version 2 of the License, or
  13 # (at your option) any later version.
  14 #
  15 # PyX is distributed in the hope that it will be useful,
  16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 # GNU General Public License for more details.
  19 #
  20 # You should have received a copy of the GNU General Public License
  21 # along with PyX; if not, write to the Free Software
  22 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
  23
  24 import math, re, configparser, struct
  25 from pyx import text
  26 from . import style
  27 builtinlist = list
  28
  29
  30 def splitatvalue(value, *splitpoints):
  31     section = 0
  32     while section < len(splitpoints) and splitpoints[section] < value:
  33         section += 1
  34     if len(splitpoints) > 1:
  35         if section % 2:
  36             section = None
  37         else:
  38             section >>= 1
  39     return (section, value)
  40
  41
  42 _mathglobals = {"neg": lambda x: -x,
  43                 "abs": lambda x: x < 0 and -x or x,
  44                 "sgn": lambda x: x < 0 and -1 or 1,
  45                 "sqrt": math.sqrt,
  46                 "exp": math.exp,
  47                 "log": math.log,
  48                 "sin": math.sin,
  49                 "cos": math.cos,
  50                 "tan": math.tan,
  51                 "asin": math.asin,
  52                 "acos": math.acos,
  53                 "atan": math.atan,
  54                 "sind": lambda x: math.sin(math.pi/180*x),
  55                 "cosd": lambda x: math.cos(math.pi/180*x),
  56                 "tand": lambda x: math.tan(math.pi/180*x),
  57                 "asind": lambda x: 180/math.pi*math.asin(x),
  58                 "acosd": lambda x: 180/math.pi*math.acos(x),
  59                 "atand": lambda x: 180/math.pi*math.atan(x),
  60                 "norm": lambda x, y: math.hypot(x, y),
  61                 "splitatvalue": splitatvalue,
  62                 "pi": math.pi,
  63                 "e": math.e}
  64
  65
  66 class _data:
  67     """graph data interface
  68
  69     Graph data consists of columns, where each column might be identified by a
  70     string or an integer. Each row in the resulting table refers to a data
  71     point.
  72
  73     All methods except for the constructor should consider self and its
  74     attributes to be readonly, since the data instance might be shared between
  75     several graphs simultaneously.
  76
  77     The instance variable columns is a dictionary mapping column names to the
  78     data of the column (i.e. to a list). Only static columns (known at
  79     construction time) are contained in that dictionary. For data with numbered
  80     columns the column data is also available via the list columndata.
  81     Otherwise the columndata list should be missing and an access to a column
  82     number will fail.
  83
  84     The names of all columns (static and dynamic) must be fixed at the constructor
  85     and stated in the columnnames dictionary.
  86
  87     The instance variable title and defaultstyles contain the data title and
  88     the default styles (a list of styles), respectively. If defaultstyles is None,
  89     the data cannot be plotted without user provided styles.
  90     """
  91
  92     def dynamiccolumns(self, graph, axisnames):
  93         """create and return dynamic columns data
  94
  95         Returns dynamic data matching the given axes (the axes range and other
  96         data might be used). The return value is a dictionary similar to the
  97         columns instance variable. However, the static and dynamic data does
  98         not need to be correlated in any way, i.e. the number of data points in
  99         self.columns might differ from the number of data points represented by
 100         the return value of the dynamiccolumns method.
 101         """
 102         return {}
 103
 104
 105 defaultsymbols = [style.symbol()]
 106 defaultlines = [style.line()]
 107
 108
 109 class values(_data):
 110
 111     defaultstyles = defaultsymbols
 112
 113     def __init__(self, title="user provided values", **columns):
 114         for i, values in enumerate(list(columns.values())):
 115             if i and len(values) != l:
 116                 raise ValueError("different number of values")
 117             else:
 118                 l = len(values)
 119         self.columns = columns
 120         self.columnnames = list(columns.keys())
 121         self.title = title
 122
 123
 124 class points(_data):
 125     "Graph data from a list of points"
 126
 127     defaultstyles = defaultsymbols
 128
 129     def __init__(self, points, title="user provided points", addlinenumbers=1, **columns):
 130         if len(points):
 131             l = len(points[0])
 132             self.columndata = [[x] for x in points[0]]
 133             for point in points[1:]:
 134                 if l != len(point):
 135                     raise ValueError("different number of columns per point")
 136                 for i, x in enumerate(point):
 137                     self.columndata[i].append(x)
 138             for v in list(columns.values()):
 139                 if abs(v) > l or (not addlinenumbers and abs(v) == l):
 140                     raise ValueError("column number bigger than number of columns")
 141             if addlinenumbers:
 142                 self.columndata = [list(range(1, len(points) + 1))] + self.columndata
 143             self.columns = dict([(key, self.columndata[i]) for key, i in list(columns.items())])
 144         else:
 145             self.columns = dict([(key, []) for key, i in list(columns.items())])
 146         self.columnnames = list(self.columns.keys())
 147         self.title = title
 148
 149
 150 class _notitle:
 151     pass
 152
 153 _columnintref = re.compile(r"\$(-?\d+)", re.IGNORECASE)
 154
 155 class data(_data):
 156     "creates a new data set out of an existing data set"
 157
 158     def __init__(self, data, title=_notitle, context={}, copy=1,
 159                        replacedollar=1, columncallback="__column__", **columns):
 160         # build a nice title
 161         if title is _notitle:
 162             items = list(columns.items())
 163             items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
 164             self.title = "%s: %s" % (text.escapestring(data.title or "unkown source"),
 165                                      ", ".join(["%s=%s" % (text.escapestring(key),
 166                                                            text.escapestring(str(value)))
 167                                                 for key, value in items]))
 168         else:
 169             self.title = title
 170
 171         self.orgdata = data
 172         self.defaultstyles = self.orgdata.defaultstyles
 173
 174         # analyse the **columns argument
 175         self.columns = {}
 176         for columnname, value in list(columns.items()):
 177             # search in the columns dictionary
 178             try:
 179                 self.columns[columnname] = self.orgdata.columns[value]
 180             except KeyError:
 181                 # search in the columndata list
 182                 try:
 183                     self.columns[columnname] = self.orgdata.columndata[value]
 184                 except (AttributeError, TypeError):
 185                     # value was not an valid column identifier
 186                     # i.e. take it as a mathematical expression
 187                     if replacedollar:
 188                         m = _columnintref.search(value)
 189                         while m:
 190                             value = "%s%s(%s)%s" % (value[:m.start()], columncallback, m.groups()[0], value[m.end():])
 191                             m = _columnintref.search(value)
 192                         value = value.replace("$", columncallback)
 193                     expression = compile(value.strip(), __file__, "eval")
 194                     context = context.copy()
 195                     context[columncallback] = self.columncallback
 196                     if self.orgdata.columns:
 197                         key, columndata = list(self.orgdata.columns.items())[0]
 198                         count = len(columndata)
 199                     elif self.orgdata.columndata:
 200                         count = len(self.orgdata.columndata[0])
 201                     else:
 202                         count = 0
 203                     newdata = []
 204                     for i in range(count):
 205                         self.columncallbackcount = i
 206                         for key, values in list(self.orgdata.columns.items()):
 207                             context[key] = values[i]
 208                         try:
 209                             newdata.append(eval(expression, _mathglobals, context))
 210                         except (ArithmeticError, ValueError):
 211                             newdata.append(None)
 212                     self.columns[columnname] = newdata
 213
 214         if copy:
 215             # copy other, non-conflicting column names
 216             for columnname, columndata in list(self.orgdata.columns.items()):
 217                 if columnname not in self.columns:
 218                     self.columns[columnname] = columndata
 219
 220         self.columnnames = list(self.columns.keys())
 221
 222     def columncallback(self, value):
 223         try:
 224             return self.orgdata.columndata[value][self.columncallbackcount]
 225         except:
 226             return self.orgdata.columns[value][self.columncallbackcount]
 227
 228
 229 filecache = {}
 230
 231 class file(data):
 232
 233     defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
 234     defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
 235     defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
 236
 237     def splitline(self, line, stringpattern, columnpattern, tofloat=1):
 238         """returns a tuple created out of the string line
 239         - matches stringpattern and columnpattern, adds the first group of that
 240           match to the result and and removes those matches until the line is empty
 241         - when stringpattern matched, the result is always kept as a string
 242         - when columnpattern matched and tofloat is true, a conversion to a float
 243           is tried; when this conversion fails, the string is kept"""
 244         result = []
 245         # try to gain speed by skip matching regular expressions
 246         if line.find('"')!=-1 or \
 247            stringpattern is not self.defaultstringpattern or \
 248            columnpattern is not self.defaultcolumnpattern:
 249             while len(line):
 250                 match = stringpattern.match(line)
 251                 if match:
 252                     result.append(match.groups()[0])
 253                     line = line[match.end():]
 254                 else:
 255                     match = columnpattern.match(line)
 256                     if tofloat:
 257                         try:
 258                             result.append(float(match.groups()[0]))
 259                         except (TypeError, ValueError):
 260                             result.append(match.groups()[0])
 261                     else:
 262                         result.append(match.groups()[0])
 263                     line = line[match.end():]
 264         else:
 265             if tofloat:
 266                 try:
 267                     return list(map(float, line.split()))
 268                 except (TypeError, ValueError):
 269                     result = []
 270                     for r in line.split():
 271                         try:
 272                             result.append(float(r))
 273                         except (TypeError, ValueError):
 274                             result.append(r)
 275             else:
 276                 return line.split()
 277         return result
 278
 279     def getcachekey(self, *args):
 280         return ":".join([str(x) for x in args])
 281
 282     def __init__(self, filename,
 283                        commentpattern=defaultcommentpattern,
 284                        stringpattern=defaultstringpattern,
 285                        columnpattern=defaultcolumnpattern,
 286                        skiphead=0, skiptail=0, every=1,
 287                        **kwargs):
 288
 289         def readfile(file, title, self=self, commentpattern=commentpattern, stringpattern=stringpattern, columnpattern=columnpattern, skiphead=skiphead, skiptail=skiptail, every=every):
 290             columns = []
 291             columndata = []
 292             linenumber = 0
 293             maxcolumns = 0
 294             for line in file.readlines():
 295                 line = line.strip()
 296                 match = commentpattern.match(line)
 297                 if match:
 298                     if not len(columndata):
 299                         columns = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
 300                 else:
 301                     linedata = []
 302                     for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
 303                         linedata.append(value)
 304                     if len(linedata):
 305                         if linenumber >= skiphead and not ((linenumber - skiphead) % every):
 306                             linedata = [linenumber + 1] + linedata
 307                             if len(linedata) > maxcolumns:
 308                                 maxcolumns = len(linedata)
 309                             columndata.append(linedata)
 310                         linenumber += 1
 311             if skiptail >= every:
 312                 skip, x = divmod(skiptail, every)
 313                 del columndata[-skip:]
 314             for i in range(len(columndata)):
 315                 if len(columndata[i]) != maxcolumns:
 316                     columndata[i].extend([None]*(maxcolumns-len(columndata[i])))
 317             return points(columndata, title=title, addlinenumbers=0,
 318                           **dict([(column, i+1) for i, column in enumerate(columns[:maxcolumns-1])]))
 319
 320         try:
 321             filename.readlines
 322         except:
 323             # not a file-like object -> open it
 324             cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
 325             if cachekey not in filecache:
 326                 with open(filename) as f:
 327                     filecache[cachekey] = readfile(f, filename)
 328             data.__init__(self, filecache[cachekey], **kwargs)
 329         else:
 330             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 331
 332
 333 conffilecache = {}
 334
 335 class conffile(data):
 336
 337     def __init__(self, filename, **kwargs):
 338         """read data from a config-like file
 339         - filename is a string
 340         - each row is defined by a section in the config-like file (see
 341           config module description)
 342         - the columns for each row are defined by lines in the section file;
 343           the option entries identify and name the columns
 344         - further keyword arguments are passed to the constructor of data,
 345           keyword arguments data and titles excluded"""
 346
 347         def readfile(file, title):
 348             config = configparser.ConfigParser(strict=False)
 349             config.optionxform = str
 350             config.read_file(file)
 351             sections = config.sections()
 352             sections.sort()
 353             columndata = [None]*len(sections)
 354             maxcolumns = 1
 355             columns = {}
 356             for i in range(len(sections)):
 357                 point = [sections[i]] + [None]*(maxcolumns-1)
 358                 for option in config.options(sections[i]):
 359                     value = config.get(sections[i], option)
 360                     try:
 361                         value = float(value)
 362                     except:
 363                         pass
 364                     try:
 365                         index = columns[option]
 366                     except KeyError:
 367                         columns[option] = maxcolumns
 368                         point.append(value)
 369                         maxcolumns += 1
 370                     else:
 371                         point[index] = value
 372                 columndata[i] = point
 373             # wrap result into a data instance to remove column numbers
 374             result = data(points(columndata, addlinenumbers=0, **columns), title=title)
 375             # ... but reinsert sections as linenumbers
 376             result.columndata = [[x[0] for x in columndata]]
 377             return result
 378
 379         try:
 380             filename.readlines
 381         except:
 382             # not a file-like object -> open it
 383             if filename not in filecache:
 384                 filecache[filename] = readfile(open(filename), filename)
 385             data.__init__(self, filecache[filename], **kwargs)
 386         else:
 387             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 388
 389
 390 cbdfilecache = {}
 391
 392 class cbdfile(data):
 393
 394     defaultstyles = defaultlines
 395
 396     def getcachekey(self, *args):
 397         return ":".join([str(x) for x in args])
 398
 399     def __init__(self, filename, minrank=None, maxrank=None, **kwargs):
 400
 401         class cbdhead:
 402
 403             def __init__(self, file):
 404                 (self.magic,
 405                  self.dictaddr,
 406                  self.segcount,
 407                  self.segsize,
 408                  self.segmax,
 409                  self.fill) = struct.unpack("<5i20s", file.read(40))
 410                 if self.magic != 0x20770002:
 411                     raise ValueError("bad magic number")
 412
 413         class segdict:
 414
 415             def __init__(self, file, i):
 416                 self.index = i
 417                 (self.segid,
 418                  self.maxlat,
 419                  self.minlat,
 420                  self.maxlong,
 421                  self.minlong,
 422                  self.absaddr,
 423                  self.nbytes,
 424                  self.rank) = struct.unpack("<6i2h", file.read(28))
 425
 426         class segment:
 427
 428             def __init__(self, file, sd):
 429                 file.seek(sd.absaddr)
 430                 (self.orgx,
 431                  self.orgy,
 432                  self.id,
 433                  self.nstrokes,
 434                  self.dummy) = struct.unpack("<3i2h", file.read(16))
 435                 oln, olt = self.orgx, self.orgy
 436                 self.points = [(olt, oln)]
 437                 for i in range(self.nstrokes):
 438                     c1, c2 = struct.unpack("2c", file.read(2))
 439                     if ord(c2) & 0x40:
 440                         if c1 > "\177":
 441                             dy = ord(c1) - 256
 442                         else:
 443                             dy = ord(c1)
 444                         if c2 > "\177":
 445                             dx = ord(c2) - 256
 446                         else:
 447                             dx = ord(c2) - 64
 448                     else:
 449                         c3, c4, c5, c6, c7, c8 = struct.unpack("6c", file.read(6))
 450                         if c2 > "\177":
 451                             c2 = chr(ord(c2) | 0x40)
 452                         dx, dy = struct.unpack("<2i", c3+c4+c1+c2+c7+c8+c5+c6)
 453                     oln += dx
 454                     olt += dy
 455                     self.points.append((olt, oln))
 456                 sd.nstrokes = self.nstrokes
 457
 458         def readfile(file, title):
 459             h = cbdhead(file)
 460             file.seek(h.dictaddr)
 461             sds = [segdict(file, i+1) for i in range(h.segcount)]
 462             sbs = [segment(file, sd) for sd in sds]
 463
 464             # remove jumps at long +/- 180
 465             for sd, sb in zip(sds, sbs):
 466                 if sd.minlong < -150*3600 and sd.maxlong > 150*3600:
 467                     for i, (lat, int) in enumerate(sb.points):
 468                          if int < 0:
 469                              sb.points[i] = lat, int + 360*3600
 470
 471             columndata = []
 472             for sd, sb in zip(sds, sbs):
 473                 if ((minrank is None or sd.rank >= minrank) and
 474                     (maxrank is None or sd.rank <= maxrank)):
 475                     if columndata:
 476                         columndata.append((None, None))
 477                     columndata.extend([(int/3600.0, lat/3600.0)
 478                                        for lat, int in sb.points])
 479
 480             result = points(columndata, title=title)
 481             result.defaultstyles = self.defaultstyles
 482             return result
 483
 484
 485         try:
 486             filename.readlines
 487         except:
 488             # not a file-like object -> open it
 489             cachekey = self.getcachekey(filename, minrank, maxrank)
 490             if cachekey not in cbdfilecache:
 491                 cbdfilecache[cachekey] = readfile(open(filename, "rb"), filename)
 492             data.__init__(self, cbdfilecache[cachekey], **kwargs)
 493         else:
 494             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 495
 496
 497 class function(_data):
 498
 499     defaultstyles = defaultlines
 500
 501     assignmentpattern = re.compile(r"\s*([a-z_][a-z0-9_]*)\s*\(\s*([a-z_][a-z0-9_]*)\s*\)\s*=", re.IGNORECASE)
 502
 503     def __init__(self, expression, title=_notitle, min=None, max=None,
 504                  points=100, context={}):
 505
 506         if title is _notitle:
 507             self.title = expression
 508         else:
 509             self.title = title
 510         self.min = min
 511         self.max = max
 512         self.numberofpoints = points
 513         self.context = context.copy() # be safe on late evaluations
 514         m = self.assignmentpattern.match(expression)
 515         if m:
 516             self.yname, self.xname = m.groups()
 517             expression = expression[m.end():]
 518         else:
 519             raise ValueError("y(x)=... or similar expected")
 520         if self.xname in context:
 521             raise ValueError("xname in context")
 522         self.expression = compile(expression.strip(), __file__, "eval")
 523         self.columns = {}
 524         self.columnnames = [self.xname, self.yname]
 525
 526     def dynamiccolumns(self, graph, axisnames):
 527         dynamiccolumns = {self.xname: [], self.yname: []}
 528
 529         xaxis = graph.axes[axisnames.get(self.xname, self.xname)]
 530         from pyx.graph.axis import logarithmic
 531         logaxis = isinstance(xaxis.axis, logarithmic)
 532         if self.min is not None:
 533             min = self.min
 534         else:
 535             min = xaxis.data.min
 536         if self.max is not None:
 537             max = self.max
 538         else:
 539             max = xaxis.data.max
 540         if logaxis:
 541             min = math.log(min)
 542             max = math.log(max)
 543         for i in range(self.numberofpoints):
 544             x = min + (max-min)*i / (self.numberofpoints-1.0)
 545             if logaxis:
 546                 x = math.exp(x)
 547             dynamiccolumns[self.xname].append(x)
 548             self.context[self.xname] = x
 549             try:
 550                 y = eval(self.expression, _mathglobals, self.context)
 551             except (ArithmeticError, ValueError):
 552                 y = None
 553             dynamiccolumns[self.yname].append(y)
 554         return dynamiccolumns
 555
 556
 557 class functionxy(function):
 558
 559     def __init__(self, f, min=None, max=None, **kwargs):
 560         function.__init__(self, "y(x)=f(x)", context={"f": f}, min=min, max=max, **kwargs)
 561
 562
 563 class paramfunction(_data):
 564
 565     defaultstyles = defaultlines
 566
 567     def __init__(self, varname, min, max, expression, title=_notitle, points=100, context={}):
 568         if varname in context:
 569             raise ValueError("varname in context")
 570         if title is _notitle:
 571             self.title = expression
 572         else:
 573             self.title = title
 574         varlist, expression = expression.split("=")
 575         expression = compile(expression.strip(), __file__, "eval")
 576         keys = [key.strip() for key in varlist.split(",")]
 577         self.columns = dict([(key, []) for key in keys])
 578         context = context.copy()
 579         for i in range(points):
 580             param = min + (max-min)*i / (points-1.0)
 581             context[varname] = param
 582             values = eval(expression, _mathglobals, context)
 583             for key, value in zip(keys, values):
 584                 self.columns[key].append(value)
 585         if len(keys) != len(values):
 586             raise ValueError("unpack tuple of wrong size")
 587         self.columnnames = list(self.columns.keys())
 588
 589
 590 class paramfunctionxy(paramfunction):
 591
 592     def __init__(self, f, min, max, **kwargs):
 593         paramfunction.__init__(self, "t", min, max, "x, y = f(t)", context={"f": f}, **kwargs)
 594
 595
 596 class _nodefaultstyles:
 597     pass
 598
 599
 600 class join(_data):
 601     "creates a new data set by joining from a list of data, it does however *not* combine points, but fills data with None if necessary"
 602
 603     def merge_lists(self, lists):
 604         "merges list items w/o duplications, resulting order is arbitraty"
 605         result = set()
 606         for l in lists:
 607             result.update(set(l))
 608         return builtinlist(result)
 609
 610     def merge_dicts(self, dicts):
 611         """merge dicts containing lists as values (with equal number of items
 612         per list in each dict), missing data is padded by None"""
 613         keys = self.merge_lists([list(d.keys()) for d in dicts])
 614         empties = []
 615         for d in dicts:
 616             if len(list(d.keys())) == len(keys):
 617                 empties.append(None) # won't be needed later on
 618             else:
 619                 values = list(d.values())
 620                 if len(values):
 621                     empties.append([None]*len(values[0]))
 622                 else:
 623                     # has no data at all -> do not add anything
 624                     empties.append([])
 625         result = {}
 626         for key in keys:
 627             result[key] = []
 628             for d, e in zip(dicts, empties):
 629                 result[key].extend(d.get(key, e))
 630         return result
 631
 632     def __init__(self, data, title=_notitle, defaultstyles=_nodefaultstyles):
 633         """takes a list of data, a title (if it should not be autoconstructed)
 634         and a defaultstyles list if there is no common defaultstyles setting
 635         for in the provided data"""
 636         assert len(data)
 637         self.data = data
 638         self.columnnames = self.merge_lists([d.columnnames for d in data])
 639         self.columns = self.merge_dicts([d.columns for d in data])
 640         if title is _notitle:
 641             self.title = " + ".join([d.title for d in data])
 642         else:
 643             self.title = title
 644         if defaultstyles is _nodefaultstyles:
 645             self.defaultstyles = data[0].defaultstyles
 646             for d in data[1:]:
 647                 if d.defaultstyles is not self.defaultstyles:
 648                     self.defaultstyles = None
 649                     break
 650         else:
 651             self.defaultstyles = defaultstyles
 652
 653     def dynamiccolumns(self, graph, axisnames):
 654         return self.merge_dicts([d.dynamiccolumns(graph, axisnames) for d in self.data])