use with statement for file operations (where appropriate)
[PyX.git] / pyx / graph / data.py
blob05e0516c6158b6293b712ba02a78ebd6ba8c10c4
1 # -*- encoding: utf-8 -*-
4 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
5 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
6 # Copyright (C) 2002-2012 André Wobst <wobsta@users.sourceforge.net>
8 # This file is part of PyX (http://pyx.sourceforge.net/).
10 # PyX is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation; either version 2 of the License, or
13 # (at your option) any later version.
15 # PyX is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
20 # You should have received a copy of the GNU General Public License
21 # along with PyX; if not, write to the Free Software
22 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 import math, re, configparser, struct, warnings
25 from pyx import text
26 from . import style
27 builtinlist = list
30 def splitatvalue(value, *splitpoints):
31 section = 0
32 while section < len(splitpoints) and splitpoints[section] < value:
33 section += 1
34 if len(splitpoints) > 1:
35 if section % 2:
36 section = None
37 else:
38 section >>= 1
39 return (section, value)
42 _mathglobals = {"neg": lambda x: -x,
43 "abs": lambda x: x < 0 and -x or x,
44 "sgn": lambda x: x < 0 and -1 or 1,
45 "sqrt": math.sqrt,
46 "exp": math.exp,
47 "log": math.log,
48 "sin": math.sin,
49 "cos": math.cos,
50 "tan": math.tan,
51 "asin": math.asin,
52 "acos": math.acos,
53 "atan": math.atan,
54 "sind": lambda x: math.sin(math.pi/180*x),
55 "cosd": lambda x: math.cos(math.pi/180*x),
56 "tand": lambda x: math.tan(math.pi/180*x),
57 "asind": lambda x: 180/math.pi*math.asin(x),
58 "acosd": lambda x: 180/math.pi*math.acos(x),
59 "atand": lambda x: 180/math.pi*math.atan(x),
60 "norm": lambda x, y: math.hypot(x, y),
61 "splitatvalue": splitatvalue,
62 "pi": math.pi,
63 "e": math.e}
66 class _data:
67 """graph data interface
69 Graph data consists of columns, where each column might be identified by a
70 string or an integer. Each row in the resulting table refers to a data
71 point.
73 All methods except for the constructor should consider self and its
74 attributes to be readonly, since the data instance might be shared between
75 several graphs simultaneously.
77 The instance variable columns is a dictionary mapping column names to the
78 data of the column (i.e. to a list). Only static columns (known at
79 construction time) are contained in that dictionary. For data with numbered
80 columns the column data is also available via the list columndata.
81 Otherwise the columndata list should be missing and an access to a column
82 number will fail.
84 The names of all columns (static and dynamic) must be fixed at the constructor
85 and stated in the columnnames dictionary.
87 The instance variable title and defaultstyles contain the data title and
88 the default styles (a list of styles), respectively. If defaultstyles is None,
89 the data cannot be plotted without user provided styles.
90 """
92 def dynamiccolumns(self, graph, axisnames):
93 """create and return dynamic columns data
95 Returns dynamic data matching the given axes (the axes range and other
96 data might be used). The return value is a dictionary similar to the
97 columns instance variable. However, the static and dynamic data does
98 not need to be correlated in any way, i.e. the number of data points in
99 self.columns might differ from the number of data points represented by
100 the return value of the dynamiccolumns method.
102 return {}
105 defaultsymbols = [style.symbol()]
106 defaultlines = [style.line()]
109 class values(_data):
111 defaultstyles = defaultsymbols
113 def __init__(self, title="user provided values", **columns):
114 for i, values in enumerate(list(columns.values())):
115 if i and len(values) != l:
116 raise ValueError("different number of values")
117 else:
118 l = len(values)
119 self.columns = columns
120 self.columnnames = list(columns.keys())
121 self.title = title
124 class points(_data):
125 "Graph data from a list of points"
127 defaultstyles = defaultsymbols
129 def __init__(self, points, title="user provided points", addlinenumbers=1, **columns):
130 if len(points):
131 l = len(points[0])
132 self.columndata = [[x] for x in points[0]]
133 for point in points[1:]:
134 if l != len(point):
135 raise ValueError("different number of columns per point")
136 for i, x in enumerate(point):
137 self.columndata[i].append(x)
138 for v in list(columns.values()):
139 if abs(v) > l or (not addlinenumbers and abs(v) == l):
140 raise ValueError("column number bigger than number of columns")
141 if addlinenumbers:
142 self.columndata = [list(range(1, len(points) + 1))] + self.columndata
143 self.columns = dict([(key, self.columndata[i]) for key, i in list(columns.items())])
144 else:
145 self.columns = dict([(key, []) for key, i in list(columns.items())])
146 self.columnnames = list(self.columns.keys())
147 self.title = title
150 class _notitle:
151 pass
153 _columnintref = re.compile(r"\$(-?\d+)", re.IGNORECASE)
155 class data(_data):
156 "creates a new data set out of an existing data set"
158 def __init__(self, data, title=_notitle, context={}, copy=1,
159 replacedollar=1, columncallback="__column__", **columns):
160 # build a nice title
161 if title is _notitle:
162 items = list(columns.items())
163 items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
164 self.title = "%s: %s" % (text.escapestring(data.title or "unkown source"),
165 ", ".join(["%s=%s" % (text.escapestring(key),
166 text.escapestring(str(value)))
167 for key, value in items]))
168 else:
169 self.title = title
171 self.orgdata = data
172 self.defaultstyles = self.orgdata.defaultstyles
174 # analyse the **columns argument
175 self.columns = {}
176 for columnname, value in list(columns.items()):
177 # search in the columns dictionary
178 try:
179 self.columns[columnname] = self.orgdata.columns[value]
180 except KeyError:
181 # search in the columndata list
182 try:
183 self.columns[columnname] = self.orgdata.columndata[value]
184 except (AttributeError, TypeError):
185 # value was not an valid column identifier
186 # i.e. take it as a mathematical expression
187 if replacedollar:
188 m = _columnintref.search(value)
189 while m:
190 value = "%s%s(%s)%s" % (value[:m.start()], columncallback, m.groups()[0], value[m.end():])
191 m = _columnintref.search(value)
192 value = value.replace("$", columncallback)
193 expression = compile(value.strip(), __file__, "eval")
194 context = context.copy()
195 context[columncallback] = self.columncallback
196 if self.orgdata.columns:
197 key, columndata = list(self.orgdata.columns.items())[0]
198 count = len(columndata)
199 elif self.orgdata.columndata:
200 count = len(self.orgdata.columndata[0])
201 else:
202 count = 0
203 newdata = []
204 for i in range(count):
205 self.columncallbackcount = i
206 for key, values in list(self.orgdata.columns.items()):
207 context[key] = values[i]
208 try:
209 newdata.append(eval(expression, _mathglobals, context))
210 except (ArithmeticError, ValueError):
211 newdata.append(None)
212 self.columns[columnname] = newdata
214 if copy:
215 # copy other, non-conflicting column names
216 for columnname, columndata in list(self.orgdata.columns.items()):
217 if columnname not in self.columns:
218 self.columns[columnname] = columndata
220 self.columnnames = list(self.columns.keys())
222 def columncallback(self, value):
223 try:
224 return self.orgdata.columndata[value][self.columncallbackcount]
225 except:
226 return self.orgdata.columns[value][self.columncallbackcount]
229 filecache = {}
231 class file(data):
233 defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
234 defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
235 defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
237 def splitline(self, line, stringpattern, columnpattern, tofloat=1):
238 """returns a tuple created out of the string line
239 - matches stringpattern and columnpattern, adds the first group of that
240 match to the result and and removes those matches until the line is empty
241 - when stringpattern matched, the result is always kept as a string
242 - when columnpattern matched and tofloat is true, a conversion to a float
243 is tried; when this conversion fails, the string is kept"""
244 result = []
245 # try to gain speed by skip matching regular expressions
246 if line.find('"')!=-1 or \
247 stringpattern is not self.defaultstringpattern or \
248 columnpattern is not self.defaultcolumnpattern:
249 while len(line):
250 match = stringpattern.match(line)
251 if match:
252 result.append(match.groups()[0])
253 line = line[match.end():]
254 else:
255 match = columnpattern.match(line)
256 if tofloat:
257 try:
258 result.append(float(match.groups()[0]))
259 except (TypeError, ValueError):
260 result.append(match.groups()[0])
261 else:
262 result.append(match.groups()[0])
263 line = line[match.end():]
264 else:
265 if tofloat:
266 try:
267 return list(map(float, line.split()))
268 except (TypeError, ValueError):
269 result = []
270 for r in line.split():
271 try:
272 result.append(float(r))
273 except (TypeError, ValueError):
274 result.append(r)
275 else:
276 return line.split()
277 return result
279 def getcachekey(self, *args):
280 return ":".join([str(x) for x in args])
282 def __init__(self, filename,
283 commentpattern=defaultcommentpattern,
284 stringpattern=defaultstringpattern,
285 columnpattern=defaultcolumnpattern,
286 skiphead=0, skiptail=0, every=1,
287 **kwargs):
289 def readfile(file, title, self=self, commentpattern=commentpattern, stringpattern=stringpattern, columnpattern=columnpattern, skiphead=skiphead, skiptail=skiptail, every=every):
290 columns = []
291 columndata = []
292 linenumber = 0
293 maxcolumns = 0
294 for line in file.readlines():
295 line = line.strip()
296 match = commentpattern.match(line)
297 if match:
298 if not len(columndata):
299 columns = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
300 else:
301 linedata = []
302 for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
303 linedata.append(value)
304 if len(linedata):
305 if linenumber >= skiphead and not ((linenumber - skiphead) % every):
306 linedata = [linenumber + 1] + linedata
307 if len(linedata) > maxcolumns:
308 maxcolumns = len(linedata)
309 columndata.append(linedata)
310 linenumber += 1
311 if skiptail >= every:
312 skip, x = divmod(skiptail, every)
313 del columndata[-skip:]
314 for i in range(len(columndata)):
315 if len(columndata[i]) != maxcolumns:
316 columndata[i].extend([None]*(maxcolumns-len(columndata[i])))
317 return points(columndata, title=title, addlinenumbers=0,
318 **dict([(column, i+1) for i, column in enumerate(columns[:maxcolumns-1])]))
320 try:
321 filename.readlines
322 except:
323 # not a file-like object -> open it
324 cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
325 if cachekey not in filecache:
326 with open(filename) as f:
327 filecache[cachekey] = readfile(f, filename)
328 data.__init__(self, filecache[cachekey], **kwargs)
329 else:
330 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
333 conffilecache = {}
335 class conffile(data):
337 def __init__(self, filename, **kwargs):
338 """read data from a config-like file
339 - filename is a string
340 - each row is defined by a section in the config-like file (see
341 config module description)
342 - the columns for each row are defined by lines in the section file;
343 the option entries identify and name the columns
344 - further keyword arguments are passed to the constructor of data,
345 keyword arguments data and titles excluded"""
347 def readfile(file, title):
348 config = configparser.ConfigParser(strict=False)
349 config.optionxform = str
350 config.read_file(file)
351 sections = config.sections()
352 sections.sort()
353 columndata = [None]*len(sections)
354 maxcolumns = 1
355 columns = {}
356 for i in range(len(sections)):
357 point = [sections[i]] + [None]*(maxcolumns-1)
358 for option in config.options(sections[i]):
359 value = config.get(sections[i], option)
360 try:
361 value = float(value)
362 except:
363 pass
364 try:
365 index = columns[option]
366 except KeyError:
367 columns[option] = maxcolumns
368 point.append(value)
369 maxcolumns += 1
370 else:
371 point[index] = value
372 columndata[i] = point
373 # wrap result into a data instance to remove column numbers
374 result = data(points(columndata, addlinenumbers=0, **columns), title=title)
375 # ... but reinsert sections as linenumbers
376 result.columndata = [[x[0] for x in columndata]]
377 return result
379 try:
380 filename.readlines
381 except:
382 # not a file-like object -> open it
383 if filename not in filecache:
384 filecache[filename] = readfile(open(filename), filename)
385 data.__init__(self, filecache[filename], **kwargs)
386 else:
387 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
390 cbdfilecache = {}
392 class cbdfile(data):
394 defaultstyles = defaultlines
396 def getcachekey(self, *args):
397 return ":".join([str(x) for x in args])
399 def __init__(self, filename, minrank=None, maxrank=None, **kwargs):
401 class cbdhead:
403 def __init__(self, file):
404 (self.magic,
405 self.dictaddr,
406 self.segcount,
407 self.segsize,
408 self.segmax,
409 self.fill) = struct.unpack("<5i20s", file.read(40))
410 if self.magic != 0x20770002:
411 raise ValueError("bad magic number")
413 class segdict:
415 def __init__(self, file, i):
416 self.index = i
417 (self.segid,
418 self.maxlat,
419 self.minlat,
420 self.maxlong,
421 self.minlong,
422 self.absaddr,
423 self.nbytes,
424 self.rank) = struct.unpack("<6i2h", file.read(28))
426 class segment:
428 def __init__(self, file, sd):
429 file.seek(sd.absaddr)
430 (self.orgx,
431 self.orgy,
432 self.id,
433 self.nstrokes,
434 self.dummy) = struct.unpack("<3i2h", file.read(16))
435 oln, olt = self.orgx, self.orgy
436 self.points = [(olt, oln)]
437 for i in range(self.nstrokes):
438 c1, c2 = struct.unpack("2c", file.read(2))
439 if ord(c2) & 0x40:
440 if c1 > "\177":
441 dy = ord(c1) - 256
442 else:
443 dy = ord(c1)
444 if c2 > "\177":
445 dx = ord(c2) - 256
446 else:
447 dx = ord(c2) - 64
448 else:
449 c3, c4, c5, c6, c7, c8 = struct.unpack("6c", file.read(6))
450 if c2 > "\177":
451 c2 = chr(ord(c2) | 0x40)
452 dx, dy = struct.unpack("<2i", c3+c4+c1+c2+c7+c8+c5+c6)
453 oln += dx
454 olt += dy
455 self.points.append((olt, oln))
456 sd.nstrokes = self.nstrokes
458 def readfile(file, title):
459 h = cbdhead(file)
460 file.seek(h.dictaddr)
461 sds = [segdict(file, i+1) for i in range(h.segcount)]
462 sbs = [segment(file, sd) for sd in sds]
464 # remove jumps at long +/- 180
465 for sd, sb in zip(sds, sbs):
466 if sd.minlong < -150*3600 and sd.maxlong > 150*3600:
467 for i, (lat, int) in enumerate(sb.points):
468 if int < 0:
469 sb.points[i] = lat, int + 360*3600
471 columndata = []
472 for sd, sb in zip(sds, sbs):
473 if ((minrank is None or sd.rank >= minrank) and
474 (maxrank is None or sd.rank <= maxrank)):
475 if columndata:
476 columndata.append((None, None))
477 columndata.extend([(int/3600.0, lat/3600.0)
478 for lat, int in sb.points])
480 result = points(columndata, title=title)
481 result.defaultstyles = self.defaultstyles
482 return result
485 try:
486 filename.readlines
487 except:
488 # not a file-like object -> open it
489 cachekey = self.getcachekey(filename, minrank, maxrank)
490 if cachekey not in cbdfilecache:
491 cbdfilecache[cachekey] = readfile(open(filename, "rb"), filename)
492 data.__init__(self, cbdfilecache[cachekey], **kwargs)
493 else:
494 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
497 class function(_data):
499 defaultstyles = defaultlines
501 assignmentpattern = re.compile(r"\s*([a-z_][a-z0-9_]*)\s*\(\s*([a-z_][a-z0-9_]*)\s*\)\s*=", re.IGNORECASE)
503 def __init__(self, expression, title=_notitle, min=None, max=None,
504 points=100, context={}):
506 if title is _notitle:
507 self.title = expression
508 else:
509 self.title = title
510 self.min = min
511 self.max = max
512 self.numberofpoints = points
513 self.context = context.copy() # be safe on late evaluations
514 m = self.assignmentpattern.match(expression)
515 if m:
516 self.yname, self.xname = m.groups()
517 expression = expression[m.end():]
518 else:
519 raise ValueError("y(x)=... or similar expected")
520 if self.xname in context:
521 raise ValueError("xname in context")
522 self.expression = compile(expression.strip(), __file__, "eval")
523 self.columns = {}
524 self.columnnames = [self.xname, self.yname]
526 def dynamiccolumns(self, graph, axisnames):
527 dynamiccolumns = {self.xname: [], self.yname: []}
529 xaxis = graph.axes[axisnames.get(self.xname, self.xname)]
530 from pyx.graph.axis import logarithmic
531 logaxis = isinstance(xaxis.axis, logarithmic)
532 if self.min is not None:
533 min = self.min
534 else:
535 min = xaxis.data.min
536 if self.max is not None:
537 max = self.max
538 else:
539 max = xaxis.data.max
540 if logaxis:
541 min = math.log(min)
542 max = math.log(max)
543 for i in range(self.numberofpoints):
544 x = min + (max-min)*i / (self.numberofpoints-1.0)
545 if logaxis:
546 x = math.exp(x)
547 dynamiccolumns[self.xname].append(x)
548 self.context[self.xname] = x
549 try:
550 y = eval(self.expression, _mathglobals, self.context)
551 except (ArithmeticError, ValueError):
552 y = None
553 dynamiccolumns[self.yname].append(y)
554 return dynamiccolumns
557 class functionxy(function):
559 def __init__(self, f, min=None, max=None, **kwargs):
560 function.__init__(self, "y(x)=f(x)", context={"f": f}, min=min, max=max, **kwargs)
563 class paramfunction(_data):
565 defaultstyles = defaultlines
567 def __init__(self, varname, min, max, expression, title=_notitle, points=100, context={}):
568 if varname in context:
569 raise ValueError("varname in context")
570 if title is _notitle:
571 self.title = expression
572 else:
573 self.title = title
574 varlist, expression = expression.split("=")
575 expression = compile(expression.strip(), __file__, "eval")
576 keys = [key.strip() for key in varlist.split(",")]
577 self.columns = dict([(key, []) for key in keys])
578 context = context.copy()
579 for i in range(points):
580 param = min + (max-min)*i / (points-1.0)
581 context[varname] = param
582 values = eval(expression, _mathglobals, context)
583 for key, value in zip(keys, values):
584 self.columns[key].append(value)
585 if len(keys) != len(values):
586 raise ValueError("unpack tuple of wrong size")
587 self.columnnames = list(self.columns.keys())
590 class paramfunctionxy(paramfunction):
592 def __init__(self, f, min, max, **kwargs):
593 paramfunction.__init__(self, "t", min, max, "x, y = f(t)", context={"f": f}, **kwargs)
596 class _nodefaultstyles:
597 pass
600 class join(_data):
601 "creates a new data set by joining from a list of data, it does however *not* combine points, but fills data with None if necessary"
603 def merge_lists(self, lists):
604 "merges list items w/o duplications, resulting order is arbitraty"
605 result = set()
606 for l in lists:
607 result.update(set(l))
608 return builtinlist(result)
610 def merge_dicts(self, dicts):
611 """merge dicts containing lists as values (with equal number of items
612 per list in each dict), missing data is padded by None"""
613 keys = self.merge_lists([list(d.keys()) for d in dicts])
614 empties = []
615 for d in dicts:
616 if len(list(d.keys())) == len(keys):
617 empties.append(None) # won't be needed later on
618 else:
619 values = list(d.values())
620 if len(values):
621 empties.append([None]*len(values[0]))
622 else:
623 # has no data at all -> do not add anything
624 empties.append([])
625 result = {}
626 for key in keys:
627 result[key] = []
628 for d, e in zip(dicts, empties):
629 result[key].extend(d.get(key, e))
630 return result
632 def __init__(self, data, title=_notitle, defaultstyles=_nodefaultstyles):
633 """takes a list of data, a title (if it should not be autoconstructed)
634 and a defaultstyles list if there is no common defaultstyles setting
635 for in the provided data"""
636 assert len(data)
637 self.data = data
638 self.columnnames = self.merge_lists([d.columnnames for d in data])
639 self.columns = self.merge_dicts([d.columns for d in data])
640 if title is _notitle:
641 self.title = " + ".join([d.title for d in data])
642 else:
643 self.title = title
644 if defaultstyles is _nodefaultstyles:
645 self.defaultstyles = data[0].defaultstyles
646 for d in data[1:]:
647 if d.defaultstyles is not self.defaultstyles:
648 self.defaultstyles = None
649 break
650 else:
651 self.defaultstyles = defaultstyles
653 def dynamiccolumns(self, graph, axisnames):
654 return self.merge_dicts([d.dynamiccolumns(graph, axisnames) for d in self.data])