restore old behaviour: config parser was not in strict mode
[PyX.git] / pyx / graph / data.py
blob164b287106ce76771ddd2e87a3f13457cd01f10a
1 # -*- encoding: utf-8 -*-
4 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
5 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
6 # Copyright (C) 2002-2012 André Wobst <wobsta@users.sourceforge.net>
8 # This file is part of PyX (http://pyx.sourceforge.net/).
10 # PyX is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation; either version 2 of the License, or
13 # (at your option) any later version.
15 # PyX is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
20 # You should have received a copy of the GNU General Public License
21 # along with PyX; if not, write to the Free Software
22 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 import math, re, configparser, struct, warnings
25 from pyx import text
26 from . import style
27 builtinlist = list
30 def splitatvalue(value, *splitpoints):
31 section = 0
32 while section < len(splitpoints) and splitpoints[section] < value:
33 section += 1
34 if len(splitpoints) > 1:
35 if section % 2:
36 section = None
37 else:
38 section >>= 1
39 return (section, value)
42 _mathglobals = {"neg": lambda x: -x,
43 "abs": lambda x: x < 0 and -x or x,
44 "sgn": lambda x: x < 0 and -1 or 1,
45 "sqrt": math.sqrt,
46 "exp": math.exp,
47 "log": math.log,
48 "sin": math.sin,
49 "cos": math.cos,
50 "tan": math.tan,
51 "asin": math.asin,
52 "acos": math.acos,
53 "atan": math.atan,
54 "sind": lambda x: math.sin(math.pi/180*x),
55 "cosd": lambda x: math.cos(math.pi/180*x),
56 "tand": lambda x: math.tan(math.pi/180*x),
57 "asind": lambda x: 180/math.pi*math.asin(x),
58 "acosd": lambda x: 180/math.pi*math.acos(x),
59 "atand": lambda x: 180/math.pi*math.atan(x),
60 "norm": lambda x, y: math.hypot(x, y),
61 "splitatvalue": splitatvalue,
62 "pi": math.pi,
63 "e": math.e}
66 class _data:
67 """graph data interface
69 Graph data consists of columns, where each column might be identified by a
70 string or an integer. Each row in the resulting table refers to a data
71 point.
73 All methods except for the constructor should consider self and its
74 attributes to be readonly, since the data instance might be shared between
75 several graphs simultaneously.
77 The instance variable columns is a dictionary mapping column names to the
78 data of the column (i.e. to a list). Only static columns (known at
79 construction time) are contained in that dictionary. For data with numbered
80 columns the column data is also available via the list columndata.
81 Otherwise the columndata list should be missing and an access to a column
82 number will fail.
84 The names of all columns (static and dynamic) must be fixed at the constructor
85 and stated in the columnnames dictionary.
87 The instance variable title and defaultstyles contain the data title and
88 the default styles (a list of styles), respectively. If defaultstyles is None,
89 the data cannot be plotted without user provided styles.
90 """
92 def dynamiccolumns(self, graph, axisnames):
93 """create and return dynamic columns data
95 Returns dynamic data matching the given axes (the axes range and other
96 data might be used). The return value is a dictionary similar to the
97 columns instance variable. However, the static and dynamic data does
98 not need to be correlated in any way, i.e. the number of data points in
99 self.columns might differ from the number of data points represented by
100 the return value of the dynamiccolumns method.
102 return {}
105 defaultsymbols = [style.symbol()]
106 defaultlines = [style.line()]
109 class values(_data):
111 defaultstyles = defaultsymbols
113 def __init__(self, title="user provided values", **columns):
114 for i, values in enumerate(list(columns.values())):
115 if i and len(values) != l:
116 raise ValueError("different number of values")
117 else:
118 l = len(values)
119 self.columns = columns
120 self.columnnames = list(columns.keys())
121 self.title = title
124 class points(_data):
125 "Graph data from a list of points"
127 defaultstyles = defaultsymbols
129 def __init__(self, points, title="user provided points", addlinenumbers=1, **columns):
130 if len(points):
131 l = len(points[0])
132 self.columndata = [[x] for x in points[0]]
133 for point in points[1:]:
134 if l != len(point):
135 raise ValueError("different number of columns per point")
136 for i, x in enumerate(point):
137 self.columndata[i].append(x)
138 for v in list(columns.values()):
139 if abs(v) > l or (not addlinenumbers and abs(v) == l):
140 raise ValueError("column number bigger than number of columns")
141 if addlinenumbers:
142 self.columndata = [list(range(1, len(points) + 1))] + self.columndata
143 self.columns = dict([(key, self.columndata[i]) for key, i in list(columns.items())])
144 else:
145 self.columns = dict([(key, []) for key, i in list(columns.items())])
146 self.columnnames = list(self.columns.keys())
147 self.title = title
150 class _notitle:
151 pass
153 _columnintref = re.compile(r"\$(-?\d+)", re.IGNORECASE)
155 class data(_data):
156 "creates a new data set out of an existing data set"
158 def __init__(self, data, title=_notitle, context={}, copy=1,
159 replacedollar=1, columncallback="__column__", **columns):
160 # build a nice title
161 if title is _notitle:
162 items = list(columns.items())
163 items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
164 self.title = "%s: %s" % (text.escapestring(data.title or "unkown source"),
165 ", ".join(["%s=%s" % (text.escapestring(key),
166 text.escapestring(str(value)))
167 for key, value in items]))
168 else:
169 self.title = title
171 self.orgdata = data
172 self.defaultstyles = self.orgdata.defaultstyles
174 # analyse the **columns argument
175 self.columns = {}
176 for columnname, value in list(columns.items()):
177 # search in the columns dictionary
178 try:
179 self.columns[columnname] = self.orgdata.columns[value]
180 except KeyError:
181 # search in the columndata list
182 try:
183 self.columns[columnname] = self.orgdata.columndata[value]
184 except (AttributeError, TypeError):
185 # value was not an valid column identifier
186 # i.e. take it as a mathematical expression
187 if replacedollar:
188 m = _columnintref.search(value)
189 while m:
190 value = "%s%s(%s)%s" % (value[:m.start()], columncallback, m.groups()[0], value[m.end():])
191 m = _columnintref.search(value)
192 value = value.replace("$", columncallback)
193 expression = compile(value.strip(), __file__, "eval")
194 context = context.copy()
195 context[columncallback] = self.columncallback
196 if self.orgdata.columns:
197 key, columndata = list(self.orgdata.columns.items())[0]
198 count = len(columndata)
199 elif self.orgdata.columndata:
200 count = len(self.orgdata.columndata[0])
201 else:
202 count = 0
203 newdata = []
204 for i in range(count):
205 self.columncallbackcount = i
206 for key, values in list(self.orgdata.columns.items()):
207 context[key] = values[i]
208 try:
209 newdata.append(eval(expression, _mathglobals, context))
210 except (ArithmeticError, ValueError):
211 newdata.append(None)
212 self.columns[columnname] = newdata
214 if copy:
215 # copy other, non-conflicting column names
216 for columnname, columndata in list(self.orgdata.columns.items()):
217 if columnname not in self.columns:
218 self.columns[columnname] = columndata
220 self.columnnames = list(self.columns.keys())
222 def columncallback(self, value):
223 try:
224 return self.orgdata.columndata[value][self.columncallbackcount]
225 except:
226 return self.orgdata.columns[value][self.columncallbackcount]
229 filecache = {}
231 class file(data):
233 defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
234 defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
235 defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
237 def splitline(self, line, stringpattern, columnpattern, tofloat=1):
238 """returns a tuple created out of the string line
239 - matches stringpattern and columnpattern, adds the first group of that
240 match to the result and and removes those matches until the line is empty
241 - when stringpattern matched, the result is always kept as a string
242 - when columnpattern matched and tofloat is true, a conversion to a float
243 is tried; when this conversion fails, the string is kept"""
244 result = []
245 # try to gain speed by skip matching regular expressions
246 if line.find('"')!=-1 or \
247 stringpattern is not self.defaultstringpattern or \
248 columnpattern is not self.defaultcolumnpattern:
249 while len(line):
250 match = stringpattern.match(line)
251 if match:
252 result.append(match.groups()[0])
253 line = line[match.end():]
254 else:
255 match = columnpattern.match(line)
256 if tofloat:
257 try:
258 result.append(float(match.groups()[0]))
259 except (TypeError, ValueError):
260 result.append(match.groups()[0])
261 else:
262 result.append(match.groups()[0])
263 line = line[match.end():]
264 else:
265 if tofloat:
266 try:
267 return list(map(float, line.split()))
268 except (TypeError, ValueError):
269 result = []
270 for r in line.split():
271 try:
272 result.append(float(r))
273 except (TypeError, ValueError):
274 result.append(r)
275 else:
276 return line.split()
277 return result
279 def getcachekey(self, *args):
280 return ":".join([str(x) for x in args])
282 def __init__(self, filename,
283 commentpattern=defaultcommentpattern,
284 stringpattern=defaultstringpattern,
285 columnpattern=defaultcolumnpattern,
286 skiphead=0, skiptail=0, every=1,
287 **kwargs):
289 def readfile(file, title, self=self, commentpattern=commentpattern, stringpattern=stringpattern, columnpattern=columnpattern, skiphead=skiphead, skiptail=skiptail, every=every):
290 columns = []
291 columndata = []
292 linenumber = 0
293 maxcolumns = 0
294 for line in file.readlines():
295 line = line.strip()
296 match = commentpattern.match(line)
297 if match:
298 if not len(columndata):
299 columns = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
300 else:
301 linedata = []
302 for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
303 linedata.append(value)
304 if len(linedata):
305 if linenumber >= skiphead and not ((linenumber - skiphead) % every):
306 linedata = [linenumber + 1] + linedata
307 if len(linedata) > maxcolumns:
308 maxcolumns = len(linedata)
309 columndata.append(linedata)
310 linenumber += 1
311 if skiptail >= every:
312 skip, x = divmod(skiptail, every)
313 del columndata[-skip:]
314 for i in range(len(columndata)):
315 if len(columndata[i]) != maxcolumns:
316 columndata[i].extend([None]*(maxcolumns-len(columndata[i])))
317 return points(columndata, title=title, addlinenumbers=0,
318 **dict([(column, i+1) for i, column in enumerate(columns[:maxcolumns-1])]))
320 try:
321 filename.readlines
322 except:
323 # not a file-like object -> open it
324 cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
325 if cachekey not in filecache:
326 filecache[cachekey] = readfile(open(filename), filename)
327 data.__init__(self, filecache[cachekey], **kwargs)
328 else:
329 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
332 conffilecache = {}
334 class conffile(data):
336 def __init__(self, filename, **kwargs):
337 """read data from a config-like file
338 - filename is a string
339 - each row is defined by a section in the config-like file (see
340 config module description)
341 - the columns for each row are defined by lines in the section file;
342 the option entries identify and name the columns
343 - further keyword arguments are passed to the constructor of data,
344 keyword arguments data and titles excluded"""
346 def readfile(file, title):
347 config = configparser.ConfigParser(strict=False)
348 config.optionxform = str
349 config.read_file(file)
350 sections = config.sections()
351 sections.sort()
352 columndata = [None]*len(sections)
353 maxcolumns = 1
354 columns = {}
355 for i in range(len(sections)):
356 point = [sections[i]] + [None]*(maxcolumns-1)
357 for option in config.options(sections[i]):
358 value = config.get(sections[i], option)
359 try:
360 value = float(value)
361 except:
362 pass
363 try:
364 index = columns[option]
365 except KeyError:
366 columns[option] = maxcolumns
367 point.append(value)
368 maxcolumns += 1
369 else:
370 point[index] = value
371 columndata[i] = point
372 # wrap result into a data instance to remove column numbers
373 result = data(points(columndata, addlinenumbers=0, **columns), title=title)
374 # ... but reinsert sections as linenumbers
375 result.columndata = [[x[0] for x in columndata]]
376 return result
378 try:
379 filename.readlines
380 except:
381 # not a file-like object -> open it
382 if filename not in filecache:
383 filecache[filename] = readfile(open(filename), filename)
384 data.__init__(self, filecache[filename], **kwargs)
385 else:
386 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
389 cbdfilecache = {}
391 class cbdfile(data):
393 defaultstyles = defaultlines
395 def getcachekey(self, *args):
396 return ":".join([str(x) for x in args])
398 def __init__(self, filename, minrank=None, maxrank=None, **kwargs):
400 class cbdhead:
402 def __init__(self, file):
403 (self.magic,
404 self.dictaddr,
405 self.segcount,
406 self.segsize,
407 self.segmax,
408 self.fill) = struct.unpack("<5i20s", file.read(40))
409 if self.magic != 0x20770002:
410 raise ValueError("bad magic number")
412 class segdict:
414 def __init__(self, file, i):
415 self.index = i
416 (self.segid,
417 self.maxlat,
418 self.minlat,
419 self.maxlong,
420 self.minlong,
421 self.absaddr,
422 self.nbytes,
423 self.rank) = struct.unpack("<6i2h", file.read(28))
425 class segment:
427 def __init__(self, file, sd):
428 file.seek(sd.absaddr)
429 (self.orgx,
430 self.orgy,
431 self.id,
432 self.nstrokes,
433 self.dummy) = struct.unpack("<3i2h", file.read(16))
434 oln, olt = self.orgx, self.orgy
435 self.points = [(olt, oln)]
436 for i in range(self.nstrokes):
437 c1, c2 = struct.unpack("2c", file.read(2))
438 if ord(c2) & 0x40:
439 if c1 > "\177":
440 dy = ord(c1) - 256
441 else:
442 dy = ord(c1)
443 if c2 > "\177":
444 dx = ord(c2) - 256
445 else:
446 dx = ord(c2) - 64
447 else:
448 c3, c4, c5, c6, c7, c8 = struct.unpack("6c", file.read(6))
449 if c2 > "\177":
450 c2 = chr(ord(c2) | 0x40)
451 dx, dy = struct.unpack("<2i", c3+c4+c1+c2+c7+c8+c5+c6)
452 oln += dx
453 olt += dy
454 self.points.append((olt, oln))
455 sd.nstrokes = self.nstrokes
457 def readfile(file, title):
458 h = cbdhead(file)
459 file.seek(h.dictaddr)
460 sds = [segdict(file, i+1) for i in range(h.segcount)]
461 sbs = [segment(file, sd) for sd in sds]
463 # remove jumps at long +/- 180
464 for sd, sb in zip(sds, sbs):
465 if sd.minlong < -150*3600 and sd.maxlong > 150*3600:
466 for i, (lat, int) in enumerate(sb.points):
467 if int < 0:
468 sb.points[i] = lat, int + 360*3600
470 columndata = []
471 for sd, sb in zip(sds, sbs):
472 if ((minrank is None or sd.rank >= minrank) and
473 (maxrank is None or sd.rank <= maxrank)):
474 if columndata:
475 columndata.append((None, None))
476 columndata.extend([(int/3600.0, lat/3600.0)
477 for lat, int in sb.points])
479 result = points(columndata, title=title)
480 result.defaultstyles = self.defaultstyles
481 return result
484 try:
485 filename.readlines
486 except:
487 # not a file-like object -> open it
488 cachekey = self.getcachekey(filename, minrank, maxrank)
489 if cachekey not in cbdfilecache:
490 cbdfilecache[cachekey] = readfile(open(filename, "rb"), filename)
491 data.__init__(self, cbdfilecache[cachekey], **kwargs)
492 else:
493 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
496 class function(_data):
498 defaultstyles = defaultlines
500 assignmentpattern = re.compile(r"\s*([a-z_][a-z0-9_]*)\s*\(\s*([a-z_][a-z0-9_]*)\s*\)\s*=", re.IGNORECASE)
502 def __init__(self, expression, title=_notitle, min=None, max=None,
503 points=100, context={}):
505 if title is _notitle:
506 self.title = expression
507 else:
508 self.title = title
509 self.min = min
510 self.max = max
511 self.numberofpoints = points
512 self.context = context.copy() # be safe on late evaluations
513 m = self.assignmentpattern.match(expression)
514 if m:
515 self.yname, self.xname = m.groups()
516 expression = expression[m.end():]
517 else:
518 raise ValueError("y(x)=... or similar expected")
519 if self.xname in context:
520 raise ValueError("xname in context")
521 self.expression = compile(expression.strip(), __file__, "eval")
522 self.columns = {}
523 self.columnnames = [self.xname, self.yname]
525 def dynamiccolumns(self, graph, axisnames):
526 dynamiccolumns = {self.xname: [], self.yname: []}
528 xaxis = graph.axes[axisnames.get(self.xname, self.xname)]
529 from pyx.graph.axis import logarithmic
530 logaxis = isinstance(xaxis.axis, logarithmic)
531 if self.min is not None:
532 min = self.min
533 else:
534 min = xaxis.data.min
535 if self.max is not None:
536 max = self.max
537 else:
538 max = xaxis.data.max
539 if logaxis:
540 min = math.log(min)
541 max = math.log(max)
542 for i in range(self.numberofpoints):
543 x = min + (max-min)*i / (self.numberofpoints-1.0)
544 if logaxis:
545 x = math.exp(x)
546 dynamiccolumns[self.xname].append(x)
547 self.context[self.xname] = x
548 try:
549 y = eval(self.expression, _mathglobals, self.context)
550 except (ArithmeticError, ValueError):
551 y = None
552 dynamiccolumns[self.yname].append(y)
553 return dynamiccolumns
556 class functionxy(function):
558 def __init__(self, f, min=None, max=None, **kwargs):
559 function.__init__(self, "y(x)=f(x)", context={"f": f}, min=min, max=max, **kwargs)
562 class paramfunction(_data):
564 defaultstyles = defaultlines
566 def __init__(self, varname, min, max, expression, title=_notitle, points=100, context={}):
567 if varname in context:
568 raise ValueError("varname in context")
569 if title is _notitle:
570 self.title = expression
571 else:
572 self.title = title
573 varlist, expression = expression.split("=")
574 expression = compile(expression.strip(), __file__, "eval")
575 keys = [key.strip() for key in varlist.split(",")]
576 self.columns = dict([(key, []) for key in keys])
577 context = context.copy()
578 for i in range(points):
579 param = min + (max-min)*i / (points-1.0)
580 context[varname] = param
581 values = eval(expression, _mathglobals, context)
582 for key, value in zip(keys, values):
583 self.columns[key].append(value)
584 if len(keys) != len(values):
585 raise ValueError("unpack tuple of wrong size")
586 self.columnnames = list(self.columns.keys())
589 class paramfunctionxy(paramfunction):
591 def __init__(self, f, min, max, **kwargs):
592 paramfunction.__init__(self, "t", min, max, "x, y = f(t)", context={"f": f}, **kwargs)
595 class _nodefaultstyles:
596 pass
599 class join(_data):
600 "creates a new data set by joining from a list of data, it does however *not* combine points, but fills data with None if necessary"
602 def merge_lists(self, lists):
603 "merges list items w/o duplications, resulting order is arbitraty"
604 result = set()
605 for l in lists:
606 result.update(set(l))
607 return builtinlist(result)
609 def merge_dicts(self, dicts):
610 """merge dicts containing lists as values (with equal number of items
611 per list in each dict), missing data is padded by None"""
612 keys = self.merge_lists([list(d.keys()) for d in dicts])
613 empties = []
614 for d in dicts:
615 if len(list(d.keys())) == len(keys):
616 empties.append(None) # won't be needed later on
617 else:
618 values = list(d.values())
619 if len(values):
620 empties.append([None]*len(values[0]))
621 else:
622 # has no data at all -> do not add anything
623 empties.append([])
624 result = {}
625 for key in keys:
626 result[key] = []
627 for d, e in zip(dicts, empties):
628 result[key].extend(d.get(key, e))
629 return result
631 def __init__(self, data, title=_notitle, defaultstyles=_nodefaultstyles):
632 """takes a list of data, a title (if it should not be autoconstructed)
633 and a defaultstyles list if there is no common defaultstyles setting
634 for in the provided data"""
635 assert len(data)
636 self.data = data
637 self.columnnames = self.merge_lists([d.columnnames for d in data])
638 self.columns = self.merge_dicts([d.columns for d in data])
639 if title is _notitle:
640 self.title = " + ".join([d.title for d in data])
641 else:
642 self.title = title
643 if defaultstyles is _nodefaultstyles:
644 self.defaultstyles = data[0].defaultstyles
645 for d in data[1:]:
646 if d.defaultstyles is not self.defaultstyles:
647 self.defaultstyles = None
648 break
649 else:
650 self.defaultstyles = defaultstyles
652 def dynamiccolumns(self, graph, axisnames):
653 return self.merge_dicts([d.dynamiccolumns(graph, axisnames) for d in self.data])