pyx/graph/data.py

   1 # -*- encoding: utf-8 -*-
   2 #
   3 #
   4 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
   5 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
   6 # Copyright (C) 2002-2012 André Wobst <wobsta@users.sourceforge.net>
   7 #
   8 # This file is part of PyX (http://pyx.sourceforge.net/).
   9 #
  10 # PyX is free software; you can redistribute it and/or modify
  11 # it under the terms of the GNU General Public License as published by
  12 # the Free Software Foundation; either version 2 of the License, or
  13 # (at your option) any later version.
  14 #
  15 # PyX is distributed in the hope that it will be useful,
  16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 # GNU General Public License for more details.
  19 #
  20 # You should have received a copy of the GNU General Public License
  21 # along with PyX; if not, write to the Free Software
  22 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
  23
  24 import math, re, configparser, struct, warnings
  25 from pyx import text
  26 from . import style
  27 builtinlist = list
  28
  29
  30 def splitatvalue(value, *splitpoints):
  31     section = 0
  32     while section < len(splitpoints) and splitpoints[section] < value:
  33         section += 1
  34     if len(splitpoints) > 1:
  35         if section % 2:
  36             section = None
  37         else:
  38             section >>= 1
  39     return (section, value)
  40
  41
  42 _mathglobals = {"neg": lambda x: -x,
  43                 "abs": lambda x: x < 0 and -x or x,
  44                 "sgn": lambda x: x < 0 and -1 or 1,
  45                 "sqrt": math.sqrt,
  46                 "exp": math.exp,
  47                 "log": math.log,
  48                 "sin": math.sin,
  49                 "cos": math.cos,
  50                 "tan": math.tan,
  51                 "asin": math.asin,
  52                 "acos": math.acos,
  53                 "atan": math.atan,
  54                 "sind": lambda x: math.sin(math.pi/180*x),
  55                 "cosd": lambda x: math.cos(math.pi/180*x),
  56                 "tand": lambda x: math.tan(math.pi/180*x),
  57                 "asind": lambda x: 180/math.pi*math.asin(x),
  58                 "acosd": lambda x: 180/math.pi*math.acos(x),
  59                 "atand": lambda x: 180/math.pi*math.atan(x),
  60                 "norm": lambda x, y: math.hypot(x, y),
  61                 "splitatvalue": splitatvalue,
  62                 "pi": math.pi,
  63                 "e": math.e}
  64
  65
  66 class _data:
  67     """graph data interface
  68
  69     Graph data consists of columns, where each column might be identified by a
  70     string or an integer. Each row in the resulting table refers to a data
  71     point.
  72
  73     All methods except for the constructor should consider self and its
  74     attributes to be readonly, since the data instance might be shared between
  75     several graphs simultaneously.
  76
  77     The instance variable columns is a dictionary mapping column names to the
  78     data of the column (i.e. to a list). Only static columns (known at
  79     construction time) are contained in that dictionary. For data with numbered
  80     columns the column data is also available via the list columndata.
  81     Otherwise the columndata list should be missing and an access to a column
  82     number will fail.
  83
  84     The names of all columns (static and dynamic) must be fixed at the constructor
  85     and stated in the columnnames dictionary.
  86
  87     The instance variable title and defaultstyles contain the data title and
  88     the default styles (a list of styles), respectively. If defaultstyles is None,
  89     the data cannot be plotted without user provided styles.
  90     """
  91
  92     def dynamiccolumns(self, graph, axisnames):
  93         """create and return dynamic columns data
  94
  95         Returns dynamic data matching the given axes (the axes range and other
  96         data might be used). The return value is a dictionary similar to the
  97         columns instance variable. However, the static and dynamic data does
  98         not need to be correlated in any way, i.e. the number of data points in
  99         self.columns might differ from the number of data points represented by
 100         the return value of the dynamiccolumns method.
 101         """
 102         return {}
 103
 104
 105 defaultsymbols = [style.symbol()]
 106 defaultlines = [style.line()]
 107
 108
 109 class values(_data):
 110
 111     defaultstyles = defaultsymbols
 112
 113     def __init__(self, title="user provided values", **columns):
 114         for i, values in enumerate(list(columns.values())):
 115             if i and len(values) != l:
 116                 raise ValueError("different number of values")
 117             else:
 118                 l = len(values)
 119         self.columns = columns
 120         self.columnnames = list(columns.keys())
 121         self.title = title
 122
 123
 124 class points(_data):
 125     "Graph data from a list of points"
 126
 127     defaultstyles = defaultsymbols
 128
 129     def __init__(self, points, title="user provided points", addlinenumbers=1, **columns):
 130         if len(points):
 131             l = len(points[0])
 132             self.columndata = [[x] for x in points[0]]
 133             for point in points[1:]:
 134                 if l != len(point):
 135                     raise ValueError("different number of columns per point")
 136                 for i, x in enumerate(point):
 137                     self.columndata[i].append(x)
 138             for v in list(columns.values()):
 139                 if abs(v) > l or (not addlinenumbers and abs(v) == l):
 140                     raise ValueError("column number bigger than number of columns")
 141             if addlinenumbers:
 142                 self.columndata = [list(range(1, len(points) + 1))] + self.columndata
 143             self.columns = dict([(key, self.columndata[i]) for key, i in list(columns.items())])
 144         else:
 145             self.columns = dict([(key, []) for key, i in list(columns.items())])
 146         self.columnnames = list(self.columns.keys())
 147         self.title = title
 148
 149
 150 class _notitle:
 151     pass
 152
 153 _columnintref = re.compile(r"\$(-?\d+)", re.IGNORECASE)
 154
 155 class data(_data):
 156     "creates a new data set out of an existing data set"
 157
 158     def __init__(self, data, title=_notitle, context={}, copy=1,
 159                        replacedollar=1, columncallback="__column__", **columns):
 160         # build a nice title
 161         if title is _notitle:
 162             items = list(columns.items())
 163             items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
 164             self.title = "%s: %s" % (text.escapestring(data.title or "unkown source"),
 165                                      ", ".join(["%s=%s" % (text.escapestring(key),
 166                                                            text.escapestring(str(value)))
 167                                                 for key, value in items]))
 168         else:
 169             self.title = title
 170
 171         self.orgdata = data
 172         self.defaultstyles = self.orgdata.defaultstyles
 173
 174         # analyse the **columns argument
 175         self.columns = {}
 176         for columnname, value in list(columns.items()):
 177             # search in the columns dictionary
 178             try:
 179                 self.columns[columnname] = self.orgdata.columns[value]
 180             except KeyError:
 181                 # search in the columndata list
 182                 try:
 183                     self.columns[columnname] = self.orgdata.columndata[value]
 184                 except (AttributeError, TypeError):
 185                     # value was not an valid column identifier
 186                     # i.e. take it as a mathematical expression
 187                     if replacedollar:
 188                         m = _columnintref.search(value)
 189                         while m:
 190                             value = "%s%s(%s)%s" % (value[:m.start()], columncallback, m.groups()[0], value[m.end():])
 191                             m = _columnintref.search(value)
 192                         value = value.replace("$", columncallback)
 193                     expression = compile(value.strip(), __file__, "eval")
 194                     context = context.copy()
 195                     context[columncallback] = self.columncallback
 196                     if self.orgdata.columns:
 197                         key, columndata = list(self.orgdata.columns.items())[0]
 198                         count = len(columndata)
 199                     elif self.orgdata.columndata:
 200                         count = len(self.orgdata.columndata[0])
 201                     else:
 202                         count = 0
 203                     newdata = []
 204                     for i in range(count):
 205                         self.columncallbackcount = i
 206                         for key, values in list(self.orgdata.columns.items()):
 207                             context[key] = values[i]
 208                         try:
 209                             newdata.append(eval(expression, _mathglobals, context))
 210                         except (ArithmeticError, ValueError):
 211                             newdata.append(None)
 212                     self.columns[columnname] = newdata
 213
 214         if copy:
 215             # copy other, non-conflicting column names
 216             for columnname, columndata in list(self.orgdata.columns.items()):
 217                 if columnname not in self.columns:
 218                     self.columns[columnname] = columndata
 219
 220         self.columnnames = list(self.columns.keys())
 221
 222     def columncallback(self, value):
 223         try:
 224             return self.orgdata.columndata[value][self.columncallbackcount]
 225         except:
 226             return self.orgdata.columns[value][self.columncallbackcount]
 227
 228
 229 filecache = {}
 230
 231 class file(data):
 232
 233     defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
 234     defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
 235     defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
 236
 237     def splitline(self, line, stringpattern, columnpattern, tofloat=1):
 238         """returns a tuple created out of the string line
 239         - matches stringpattern and columnpattern, adds the first group of that
 240           match to the result and and removes those matches until the line is empty
 241         - when stringpattern matched, the result is always kept as a string
 242         - when columnpattern matched and tofloat is true, a conversion to a float
 243           is tried; when this conversion fails, the string is kept"""
 244         result = []
 245         # try to gain speed by skip matching regular expressions
 246         if line.find('"')!=-1 or \
 247            stringpattern is not self.defaultstringpattern or \
 248            columnpattern is not self.defaultcolumnpattern:
 249             while len(line):
 250                 match = stringpattern.match(line)
 251                 if match:
 252                     result.append(match.groups()[0])
 253                     line = line[match.end():]
 254                 else:
 255                     match = columnpattern.match(line)
 256                     if tofloat:
 257                         try:
 258                             result.append(float(match.groups()[0]))
 259                         except (TypeError, ValueError):
 260                             result.append(match.groups()[0])
 261                     else:
 262                         result.append(match.groups()[0])
 263                     line = line[match.end():]
 264         else:
 265             if tofloat:
 266                 try:
 267                     return list(map(float, line.split()))
 268                 except (TypeError, ValueError):
 269                     result = []
 270                     for r in line.split():
 271                         try:
 272                             result.append(float(r))
 273                         except (TypeError, ValueError):
 274                             result.append(r)
 275             else:
 276                 return line.split()
 277         return result
 278
 279     def getcachekey(self, *args):
 280         return ":".join([str(x) for x in args])
 281
 282     def __init__(self, filename,
 283                        commentpattern=defaultcommentpattern,
 284                        stringpattern=defaultstringpattern,
 285                        columnpattern=defaultcolumnpattern,
 286                        skiphead=0, skiptail=0, every=1,
 287                        **kwargs):
 288
 289         def readfile(file, title, self=self, commentpattern=commentpattern, stringpattern=stringpattern, columnpattern=columnpattern, skiphead=skiphead, skiptail=skiptail, every=every):
 290             columns = []
 291             columndata = []
 292             linenumber = 0
 293             maxcolumns = 0
 294             for line in file.readlines():
 295                 line = line.strip()
 296                 match = commentpattern.match(line)
 297                 if match:
 298                     if not len(columndata):
 299                         columns = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
 300                 else:
 301                     linedata = []
 302                     for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
 303                         linedata.append(value)
 304                     if len(linedata):
 305                         if linenumber >= skiphead and not ((linenumber - skiphead) % every):
 306                             linedata = [linenumber + 1] + linedata
 307                             if len(linedata) > maxcolumns:
 308                                 maxcolumns = len(linedata)
 309                             columndata.append(linedata)
 310                         linenumber += 1
 311             if skiptail >= every:
 312                 skip, x = divmod(skiptail, every)
 313                 del columndata[-skip:]
 314             for i in range(len(columndata)):
 315                 if len(columndata[i]) != maxcolumns:
 316                     columndata[i].extend([None]*(maxcolumns-len(columndata[i])))
 317             return points(columndata, title=title, addlinenumbers=0,
 318                           **dict([(column, i+1) for i, column in enumerate(columns[:maxcolumns-1])]))
 319
 320         try:
 321             filename.readlines
 322         except:
 323             # not a file-like object -> open it
 324             cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
 325             if cachekey not in filecache:
 326                 filecache[cachekey] = readfile(open(filename), filename)
 327             data.__init__(self, filecache[cachekey], **kwargs)
 328         else:
 329             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 330
 331
 332 conffilecache = {}
 333
 334 class conffile(data):
 335
 336     def __init__(self, filename, **kwargs):
 337         """read data from a config-like file
 338         - filename is a string
 339         - each row is defined by a section in the config-like file (see
 340           config module description)
 341         - the columns for each row are defined by lines in the section file;
 342           the option entries identify and name the columns
 343         - further keyword arguments are passed to the constructor of data,
 344           keyword arguments data and titles excluded"""
 345
 346         def readfile(file, title):
 347             config = configparser.ConfigParser(strict=False)
 348             config.optionxform = str
 349             config.read_file(file)
 350             sections = config.sections()
 351             sections.sort()
 352             columndata = [None]*len(sections)
 353             maxcolumns = 1
 354             columns = {}
 355             for i in range(len(sections)):
 356                 point = [sections[i]] + [None]*(maxcolumns-1)
 357                 for option in config.options(sections[i]):
 358                     value = config.get(sections[i], option)
 359                     try:
 360                         value = float(value)
 361                     except:
 362                         pass
 363                     try:
 364                         index = columns[option]
 365                     except KeyError:
 366                         columns[option] = maxcolumns
 367                         point.append(value)
 368                         maxcolumns += 1
 369                     else:
 370                         point[index] = value
 371                 columndata[i] = point
 372             # wrap result into a data instance to remove column numbers
 373             result = data(points(columndata, addlinenumbers=0, **columns), title=title)
 374             # ... but reinsert sections as linenumbers
 375             result.columndata = [[x[0] for x in columndata]]
 376             return result
 377
 378         try:
 379             filename.readlines
 380         except:
 381             # not a file-like object -> open it
 382             if filename not in filecache:
 383                 filecache[filename] = readfile(open(filename), filename)
 384             data.__init__(self, filecache[filename], **kwargs)
 385         else:
 386             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 387
 388
 389 cbdfilecache = {}
 390
 391 class cbdfile(data):
 392
 393     defaultstyles = defaultlines
 394
 395     def getcachekey(self, *args):
 396         return ":".join([str(x) for x in args])
 397
 398     def __init__(self, filename, minrank=None, maxrank=None, **kwargs):
 399
 400         class cbdhead:
 401
 402             def __init__(self, file):
 403                 (self.magic,
 404                  self.dictaddr,
 405                  self.segcount,
 406                  self.segsize,
 407                  self.segmax,
 408                  self.fill) = struct.unpack("<5i20s", file.read(40))
 409                 if self.magic != 0x20770002:
 410                     raise ValueError("bad magic number")
 411
 412         class segdict:
 413
 414             def __init__(self, file, i):
 415                 self.index = i
 416                 (self.segid,
 417                  self.maxlat,
 418                  self.minlat,
 419                  self.maxlong,
 420                  self.minlong,
 421                  self.absaddr,
 422                  self.nbytes,
 423                  self.rank) = struct.unpack("<6i2h", file.read(28))
 424
 425         class segment:
 426
 427             def __init__(self, file, sd):
 428                 file.seek(sd.absaddr)
 429                 (self.orgx,
 430                  self.orgy,
 431                  self.id,
 432                  self.nstrokes,
 433                  self.dummy) = struct.unpack("<3i2h", file.read(16))
 434                 oln, olt = self.orgx, self.orgy
 435                 self.points = [(olt, oln)]
 436                 for i in range(self.nstrokes):
 437                     c1, c2 = struct.unpack("2c", file.read(2))
 438                     if ord(c2) & 0x40:
 439                         if c1 > "\177":
 440                             dy = ord(c1) - 256
 441                         else:
 442                             dy = ord(c1)
 443                         if c2 > "\177":
 444                             dx = ord(c2) - 256
 445                         else:
 446                             dx = ord(c2) - 64
 447                     else:
 448                         c3, c4, c5, c6, c7, c8 = struct.unpack("6c", file.read(6))
 449                         if c2 > "\177":
 450                             c2 = chr(ord(c2) | 0x40)
 451                         dx, dy = struct.unpack("<2i", c3+c4+c1+c2+c7+c8+c5+c6)
 452                     oln += dx
 453                     olt += dy
 454                     self.points.append((olt, oln))
 455                 sd.nstrokes = self.nstrokes
 456
 457         def readfile(file, title):
 458             h = cbdhead(file)
 459             file.seek(h.dictaddr)
 460             sds = [segdict(file, i+1) for i in range(h.segcount)]
 461             sbs = [segment(file, sd) for sd in sds]
 462
 463             # remove jumps at long +/- 180
 464             for sd, sb in zip(sds, sbs):
 465                 if sd.minlong < -150*3600 and sd.maxlong > 150*3600:
 466                     for i, (lat, int) in enumerate(sb.points):
 467                          if int < 0:
 468                              sb.points[i] = lat, int + 360*3600
 469
 470             columndata = []
 471             for sd, sb in zip(sds, sbs):
 472                 if ((minrank is None or sd.rank >= minrank) and
 473                     (maxrank is None or sd.rank <= maxrank)):
 474                     if columndata:
 475                         columndata.append((None, None))
 476                     columndata.extend([(int/3600.0, lat/3600.0)
 477                                        for lat, int in sb.points])
 478
 479             result = points(columndata, title=title)
 480             result.defaultstyles = self.defaultstyles
 481             return result
 482
 483
 484         try:
 485             filename.readlines
 486         except:
 487             # not a file-like object -> open it
 488             cachekey = self.getcachekey(filename, minrank, maxrank)
 489             if cachekey not in cbdfilecache:
 490                 cbdfilecache[cachekey] = readfile(open(filename, "rb"), filename)
 491             data.__init__(self, cbdfilecache[cachekey], **kwargs)
 492         else:
 493             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 494
 495
 496 class function(_data):
 497
 498     defaultstyles = defaultlines
 499
 500     assignmentpattern = re.compile(r"\s*([a-z_][a-z0-9_]*)\s*\(\s*([a-z_][a-z0-9_]*)\s*\)\s*=", re.IGNORECASE)
 501
 502     def __init__(self, expression, title=_notitle, min=None, max=None,
 503                  points=100, context={}):
 504
 505         if title is _notitle:
 506             self.title = expression
 507         else:
 508             self.title = title
 509         self.min = min
 510         self.max = max
 511         self.numberofpoints = points
 512         self.context = context.copy() # be safe on late evaluations
 513         m = self.assignmentpattern.match(expression)
 514         if m:
 515             self.yname, self.xname = m.groups()
 516             expression = expression[m.end():]
 517         else:
 518             raise ValueError("y(x)=... or similar expected")
 519         if self.xname in context:
 520             raise ValueError("xname in context")
 521         self.expression = compile(expression.strip(), __file__, "eval")
 522         self.columns = {}
 523         self.columnnames = [self.xname, self.yname]
 524
 525     def dynamiccolumns(self, graph, axisnames):
 526         dynamiccolumns = {self.xname: [], self.yname: []}
 527
 528         xaxis = graph.axes[axisnames.get(self.xname, self.xname)]
 529         from pyx.graph.axis import logarithmic
 530         logaxis = isinstance(xaxis.axis, logarithmic)
 531         if self.min is not None:
 532             min = self.min
 533         else:
 534             min = xaxis.data.min
 535         if self.max is not None:
 536             max = self.max
 537         else:
 538             max = xaxis.data.max
 539         if logaxis:
 540             min = math.log(min)
 541             max = math.log(max)
 542         for i in range(self.numberofpoints):
 543             x = min + (max-min)*i / (self.numberofpoints-1.0)
 544             if logaxis:
 545                 x = math.exp(x)
 546             dynamiccolumns[self.xname].append(x)
 547             self.context[self.xname] = x
 548             try:
 549                 y = eval(self.expression, _mathglobals, self.context)
 550             except (ArithmeticError, ValueError):
 551                 y = None
 552             dynamiccolumns[self.yname].append(y)
 553         return dynamiccolumns
 554
 555
 556 class functionxy(function):
 557
 558     def __init__(self, f, min=None, max=None, **kwargs):
 559         function.__init__(self, "y(x)=f(x)", context={"f": f}, min=min, max=max, **kwargs)
 560
 561
 562 class paramfunction(_data):
 563
 564     defaultstyles = defaultlines
 565
 566     def __init__(self, varname, min, max, expression, title=_notitle, points=100, context={}):
 567         if varname in context:
 568             raise ValueError("varname in context")
 569         if title is _notitle:
 570             self.title = expression
 571         else:
 572             self.title = title
 573         varlist, expression = expression.split("=")
 574         expression = compile(expression.strip(), __file__, "eval")
 575         keys = [key.strip() for key in varlist.split(",")]
 576         self.columns = dict([(key, []) for key in keys])
 577         context = context.copy()
 578         for i in range(points):
 579             param = min + (max-min)*i / (points-1.0)
 580             context[varname] = param
 581             values = eval(expression, _mathglobals, context)
 582             for key, value in zip(keys, values):
 583                 self.columns[key].append(value)
 584         if len(keys) != len(values):
 585             raise ValueError("unpack tuple of wrong size")
 586         self.columnnames = list(self.columns.keys())
 587
 588
 589 class paramfunctionxy(paramfunction):
 590
 591     def __init__(self, f, min, max, **kwargs):
 592         paramfunction.__init__(self, "t", min, max, "x, y = f(t)", context={"f": f}, **kwargs)
 593
 594
 595 class _nodefaultstyles:
 596     pass
 597
 598
 599 class join(_data):
 600     "creates a new data set by joining from a list of data, it does however *not* combine points, but fills data with None if necessary"
 601
 602     def merge_lists(self, lists):
 603         "merges list items w/o duplications, resulting order is arbitraty"
 604         result = set()
 605         for l in lists:
 606             result.update(set(l))
 607         return builtinlist(result)
 608
 609     def merge_dicts(self, dicts):
 610         """merge dicts containing lists as values (with equal number of items
 611         per list in each dict), missing data is padded by None"""
 612         keys = self.merge_lists([list(d.keys()) for d in dicts])
 613         empties = []
 614         for d in dicts:
 615             if len(list(d.keys())) == len(keys):
 616                 empties.append(None) # won't be needed later on
 617             else:
 618                 values = list(d.values())
 619                 if len(values):
 620                     empties.append([None]*len(values[0]))
 621                 else:
 622                     # has no data at all -> do not add anything
 623                     empties.append([])
 624         result = {}
 625         for key in keys:
 626             result[key] = []
 627             for d, e in zip(dicts, empties):
 628                 result[key].extend(d.get(key, e))
 629         return result
 630
 631     def __init__(self, data, title=_notitle, defaultstyles=_nodefaultstyles):
 632         """takes a list of data, a title (if it should not be autoconstructed)
 633         and a defaultstyles list if there is no common defaultstyles setting
 634         for in the provided data"""
 635         assert len(data)
 636         self.data = data
 637         self.columnnames = self.merge_lists([d.columnnames for d in data])
 638         self.columns = self.merge_dicts([d.columns for d in data])
 639         if title is _notitle:
 640             self.title = " + ".join([d.title for d in data])
 641         else:
 642             self.title = title
 643         if defaultstyles is _nodefaultstyles:
 644             self.defaultstyles = data[0].defaultstyles
 645             for d in data[1:]:
 646                 if d.defaultstyles is not self.defaultstyles:
 647                     self.defaultstyles = None
 648                     break
 649         else:
 650             self.defaultstyles = defaultstyles
 651
 652     def dynamiccolumns(self, graph, axisnames):
 653         return self.merge_dicts([d.dynamiccolumns(graph, axisnames) for d in self.data])