default style instantiation bug in list and (the not yet announced) cbdfile (reported...
[PyX/mjg.git] / pyx / graph / data.py
blobc7d2ab70f21cac2f08fd201ae4bed2f07466fe8f
1 #!/usr/bin/env python
2 # -*- coding: ISO-8859-1 -*-
5 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
6 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
7 # Copyright (C) 2002-2005 André Wobst <wobsta@users.sourceforge.net>
9 # This file is part of PyX (http://pyx.sourceforge.net/).
11 # PyX is free software; you can redistribute it and/or modify
12 # it under the terms of the GNU General Public License as published by
13 # the Free Software Foundation; either version 2 of the License, or
14 # (at your option) any later version.
16 # PyX is distributed in the hope that it will be useful,
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 # GNU General Public License for more details.
21 # You should have received a copy of the GNU General Public License
22 # along with PyX; if not, write to the Free Software
23 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 from __future__ import nested_scopes
27 import math, re, ConfigParser, struct, warnings
28 from pyx import text
29 from pyx.style import linestyle
30 from pyx.graph import style
32 try:
33 enumerate([])
34 except NameError:
35 # fallback implementation for Python 2.2 and below
36 def enumerate(list):
37 return zip(xrange(len(list)), list)
39 try:
40 dict()
41 except NameError:
42 # fallback implementation for Python 2.1
43 def dict(items):
44 result = {}
45 for key, value in items:
46 result[key] = value
47 return result
50 def splitatvalue(value, *splitpoints):
51 section = 0
52 while section < len(splitpoints) and splitpoints[section] < value:
53 section += 1
54 if len(splitpoints) > 1:
55 if section % 2:
56 section = None
57 else:
58 section >>= 1
59 return (section, value)
62 _mathglobals = {"neg": lambda x: -x,
63 "abs": lambda x: x < 0 and -x or x,
64 "sgn": lambda x: x < 0 and -1 or 1,
65 "sqrt": math.sqrt,
66 "exp": math.exp,
67 "log": math.log,
68 "sin": math.sin,
69 "cos": math.cos,
70 "tan": math.tan,
71 "asin": math.asin,
72 "acos": math.acos,
73 "atan": math.atan,
74 "sind": lambda x: math.sin(math.pi/180*x),
75 "cosd": lambda x: math.cos(math.pi/180*x),
76 "tand": lambda x: math.tan(math.pi/180*x),
77 "asind": lambda x: 180/math.pi*math.asin(x),
78 "acosd": lambda x: 180/math.pi*math.acos(x),
79 "atand": lambda x: 180/math.pi*math.atan(x),
80 "norm": lambda x, y: math.hypot(x, y),
81 "splitatvalue": splitatvalue,
82 "pi": math.pi,
83 "e": math.e}
86 class _data:
87 """graph data interface
89 Graph data consists in columns, where each column might be identified by a
90 string or an integer. Each row in the resulting table refers to a data
91 point.
93 All methods except for the constructor should consider self and its
94 attributes to be readonly, since the data instance might be shared between
95 several graphs simultaniously.
97 The instance variable columns is a dictionary mapping column names to the
98 data of the column (i.e. to a list). Only static columns (known at
99 construction time) are contained in that dictionary. For data with numbered
100 columns the column data is also available via the list columndata.
101 Otherwise the columndata list should be missing and an access to a column
102 number will fail.
104 The names of all columns (static and dynamic) must be fixed at the constructor
105 and stated in the columnnames dictionary.
107 The instance variable title and defaultstyles contain the data title and
108 the default styles (a list of styles), respectively.
111 def dynamiccolumns(self, graph):
112 """create and return dynamic columns data
114 Returns dynamic data matching the given axes (the axes range and other
115 data might be used). The return value is a dictionary similar to the
116 columns instance variable.
118 return {}
121 class list(_data):
122 "Graph data from a list of points"
124 defaultstyles = [style.symbol()]
126 def __init__(self, points, title="user provided list", addlinenumbers=1, **columns):
127 if len(points):
128 l = len(points[0])
129 self.columndata = [[x] for x in points[0]]
130 for point in points[1:]:
131 if l != len(point):
132 raise ValueError("different number of columns per point")
133 for i, x in enumerate(point):
134 self.columndata[i].append(x)
135 for v in columns.values():
136 if abs(v) > l or (not addlinenumbers and abs(v) == l):
137 raise ValueError("column number bigger than number of columns")
138 if addlinenumbers:
139 self.columndata = [range(1, len(points) + 1)] + self.columndata
140 self.columns = dict([(key, self.columndata[i]) for key, i in columns.items()])
141 else:
142 self.columns = dict([(key, []) for key, i in columns])
143 self.columnnames = self.columns.keys()
144 self.title = title
147 class _notitle:
148 pass
150 _columnintref = re.compile(r"\$(-?\d+)", re.IGNORECASE)
152 class data(_data):
153 "creates a new data set out of an existing data set"
155 def __init__(self, data, title=_notitle, context={}, copy=1,
156 replacedollar=1, columncallback="__column__", **columns):
157 # build a nice title
158 if title is _notitle:
159 items = columns.items()
160 items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
161 self.title = "%s: %s" % (text.escapestring(data.title or "unkown source"),
162 ", ".join(["%s=%s" % (text.escapestring(key),
163 text.escapestring(str(value)))
164 for key, value in items]))
165 else:
166 self.title = title
168 self.orgdata = data
169 self.defaultstyles = self.orgdata.defaultstyles
171 # analyse the **columns argument
172 self.columns = {}
173 for columnname, value in columns.items():
174 # search in the columns dictionary
175 try:
176 self.columns[columnname] = self.orgdata.columns[value]
177 except KeyError:
178 # search in the columndata list
179 try:
180 self.columns[columnname] = self.orgdata.columndata[value]
181 except (AttributeError, TypeError):
182 # value was not an valid column identifier
183 # i.e. take it as a mathematical expression
184 if replacedollar:
185 m = _columnintref.search(value)
186 while m:
187 value = "%s%s(%s)%s" % (value[:m.start()], columncallback, m.groups()[0], value[m.end():])
188 m = _columnintref.search(value)
189 value = value.replace("$", columncallback)
190 expression = compile(value.strip(), __file__, "eval")
191 context = context.copy()
192 context[columncallback] = self.columncallback
193 if self.orgdata.columns:
194 key, columndata = self.orgdata.columns.items()[0]
195 count = len(columndata)
196 elif self.orgdata.columndata:
197 count = len(self.orgdata.columndata[0])
198 else:
199 count = 0
200 newdata = []
201 for i in xrange(count):
202 self.columncallbackcount = i
203 for key, values in self.orgdata.columns.items():
204 context[key] = values[i]
205 try:
206 newdata.append(eval(expression, _mathglobals, context))
207 except (ArithmeticError, ValueError):
208 newdata.append(None)
209 self.columns[columnname] = newdata
211 if copy:
212 # copy other, non-conflicting column names
213 for columnname, columndata in self.orgdata.columns.items():
214 if not self.columns.has_key(columnname):
215 self.columns[columnname] = columndata
217 self.columnnames = self.columns.keys()
219 def columncallback(self, value):
220 try:
221 return self.orgdata.columndata[value][self.columncallbackcount]
222 except:
223 return self.orgdata.columns[value][self.columncallbackcount]
226 filecache = {}
228 class file(data):
230 defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
231 defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
232 defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
234 def splitline(self, line, stringpattern, columnpattern, tofloat=1):
235 """returns a tuple created out of the string line
236 - matches stringpattern and columnpattern, adds the first group of that
237 match to the result and and removes those matches until the line is empty
238 - when stringpattern matched, the result is always kept as a string
239 - when columnpattern matched and tofloat is true, a conversion to a float
240 is tried; when this conversion fails, the string is kept"""
241 result = []
242 # try to gain speed by skip matching regular expressions
243 if line.find('"')!=-1 or \
244 stringpattern is not self.defaultstringpattern or \
245 columnpattern is not self.defaultcolumnpattern:
246 while len(line):
247 match = stringpattern.match(line)
248 if match:
249 result.append(match.groups()[0])
250 line = line[match.end():]
251 else:
252 match = columnpattern.match(line)
253 if tofloat:
254 try:
255 result.append(float(match.groups()[0]))
256 except (TypeError, ValueError):
257 result.append(match.groups()[0])
258 else:
259 result.append(match.groups()[0])
260 line = line[match.end():]
261 else:
262 if tofloat:
263 try:
264 return map(float, line.split())
265 except (TypeError, ValueError):
266 result = []
267 for r in line.split():
268 try:
269 result.append(float(r))
270 except (TypeError, ValueError):
271 result.append(r)
272 else:
273 return line.split()
274 return result
276 def getcachekey(self, *args):
277 return ":".join([str(x) for x in args])
279 def __init__(self, filename,
280 commentpattern=defaultcommentpattern,
281 stringpattern=defaultstringpattern,
282 columnpattern=defaultcolumnpattern,
283 skiphead=0, skiptail=0, every=1,
284 **kwargs):
286 def readfile(file, title, self=self, commentpattern=commentpattern, stringpattern=stringpattern, columnpattern=columnpattern, skiphead=skiphead, skiptail=skiptail, every=every):
287 columns = []
288 columndata = []
289 linenumber = 0
290 maxcolumns = 0
291 for line in file.readlines():
292 line = line.strip()
293 match = commentpattern.match(line)
294 if match:
295 if not len(columndata):
296 columns = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
297 else:
298 linedata = []
299 for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
300 linedata.append(value)
301 if len(linedata):
302 if linenumber >= skiphead and not ((linenumber - skiphead) % every):
303 linedata = [linenumber + 1] + linedata
304 if len(linedata) > maxcolumns:
305 maxcolumns = len(linedata)
306 columndata.append(linedata)
307 linenumber += 1
308 if skiptail >= every:
309 skip, x = divmod(skiptail, every)
310 del columndata[-skip:]
311 for i in xrange(len(columndata)):
312 if len(columndata[i]) != maxcolumns:
313 columndata[i].extend([None]*(maxcolumns-len(columndata[i])))
314 return list(columndata, title=title, addlinenumbers=0,
315 **dict([(column, i+1) for i, column in enumerate(columns[:maxcolumns-1])]))
317 try:
318 filename.readlines
319 except:
320 # not a file-like object -> open it
321 cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
322 if not filecache.has_key(cachekey):
323 filecache[cachekey] = readfile(open(filename), filename)
324 data.__init__(self, filecache[cachekey], **kwargs)
325 else:
326 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
329 conffilecache = {}
331 class conffile(data):
333 def __init__(self, filename, **kwargs):
334 """read data from a config-like file
335 - filename is a string
336 - each row is defined by a section in the config-like file (see
337 config module description)
338 - the columns for each row are defined by lines in the section file;
339 the option entries identify and name the columns
340 - further keyword arguments are passed to the constructor of data,
341 keyword arguments data and titles excluded"""
343 def readfile(file, title):
344 config = ConfigParser.ConfigParser()
345 config.optionxform = str
346 config.readfp(file)
347 sections = config.sections()
348 sections.sort()
349 columndata = [None]*len(sections)
350 maxcolumns = 1
351 columns = {}
352 for i in xrange(len(sections)):
353 point = [sections[i]] + [None]*(maxcolumns-1)
354 for option in config.options(sections[i]):
355 value = config.get(sections[i], option)
356 try:
357 value = float(value)
358 except:
359 pass
360 try:
361 index = columns[option]
362 except KeyError:
363 columns[option] = maxcolumns
364 point.append(value)
365 maxcolumns += 1
366 else:
367 point[index] = value
368 columndata[i] = point
369 # wrap result into a data instance to remove column numbers
370 result = data(list(columndata, addlinenumbers=0, **columns), title=title)
371 # ... but reinsert sections as linenumbers
372 result.columndata = [[x[0] for x in columndata]]
373 return result
375 try:
376 filename.readlines
377 except:
378 # not a file-like object -> open it
379 if not filecache.has_key(filename):
380 filecache[filename] = readfile(open(filename), filename)
381 data.__init__(self, filecache[filename], **kwargs)
382 else:
383 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
386 cbdfilecache = {}
388 class cbdfile(data):
390 defaultstyles = [style.line()]
392 def getcachekey(self, *args):
393 return ":".join([str(x) for x in args])
395 def __init__(self, filename, minrank=None, maxrank=None, **kwargs):
397 class cbdhead:
399 def __init__(self, file):
400 (self.magic,
401 self.dictaddr,
402 self.segcount,
403 self.segsize,
404 self.segmax,
405 self.fill) = struct.unpack("<5i20s", file.read(40))
406 if self.magic != 0x20770002:
407 raise ValueError("bad magic number")
409 class segdict:
411 def __init__(self, file, i):
412 self.index = i
413 (self.segid,
414 self.maxlat,
415 self.minlat,
416 self.maxlong,
417 self.minlong,
418 self.absaddr,
419 self.nbytes,
420 self.rank) = struct.unpack("<6i2h", file.read(28))
422 class segment:
424 def __init__(self, file, sd):
425 file.seek(sd.absaddr)
426 (self.orgx,
427 self.orgy,
428 self.id,
429 self.nstrokes,
430 self.dummy) = struct.unpack("<3i2h", file.read(16))
431 oln, olt = self.orgx, self.orgy
432 self.points = [(olt, oln)]
433 for i in range(self.nstrokes):
434 c1, c2 = struct.unpack("2c", file.read(2))
435 if ord(c2) & 0x40:
436 if c1 > "\177":
437 dy = ord(c1) - 256
438 else:
439 dy = ord(c1)
440 if c2 > "\177":
441 dx = ord(c2) - 256
442 else:
443 dx = ord(c2) - 64
444 else:
445 c3, c4, c5, c6, c7, c8 = struct.unpack("6c", file.read(6))
446 if c2 > "\177":
447 c2 = chr(ord(c2) | 0x40)
448 dx, dy = struct.unpack("<2i", c3+c4+c1+c2+c7+c8+c5+c6)
449 oln += dx
450 olt += dy
451 self.points.append((olt, oln))
452 sd.nstrokes = self.nstrokes
454 def readfile(file, title):
455 h = cbdhead(file)
456 file.seek(h.dictaddr)
457 sds = [segdict(file, i+1) for i in range(h.segcount)]
458 sbs = [segment(file, sd) for sd in sds]
460 # remove jumps at long +/- 180
461 for sd, sb in zip(sds, sbs):
462 if sd.minlong < -150*3600 and sd.maxlong > 150*3600:
463 for i, (lat, long) in enumerate(sb.points):
464 if long < 0:
465 sb.points[i] = lat, long + 360*3600
467 columndata = []
468 for sd, sb in zip(sds, sbs):
469 if ((minrank is None or sd.rank >= minrank) and
470 (maxrank is None or sd.rank <= maxrank)):
471 if columndata:
472 columndata.append((None, None))
473 columndata.extend([(long/3600.0, lat/3600.0)
474 for lat, long in sb.points])
476 result = list(columndata, title=title)
477 result.defaultstyles = self.defaultstyles
478 return result
481 try:
482 filename.readlines
483 except:
484 # not a file-like object -> open it
485 cachekey = self.getcachekey(filename, minrank, maxrank)
486 if not cbdfilecache.has_key(cachekey):
487 cbdfilecache[cachekey] = readfile(open(filename, "rb"), filename)
488 data.__init__(self, cbdfilecache[cachekey], **kwargs)
489 else:
490 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
493 class function(_data):
495 defaultstyles = [style.line()]
497 assignmentpattern = re.compile(r"\s*([a-z_][a-z0-9_]*)\s*\(\s*([a-z_][a-z0-9_]*)\s*\)\s*=", re.IGNORECASE)
499 def __init__(self, expression, title=_notitle, min=None, max=None,
500 points=100, context={}):
502 if title is _notitle:
503 self.title = expression
504 else:
505 self.title = title
506 self.min = min
507 self.max = max
508 self.numberofpoints = points
509 self.context = context.copy() # be save on late evaluations
510 m = self.assignmentpattern.match(expression)
511 if m:
512 self.yname, self.xname = m.groups()
513 expression = expression[m.end():]
514 else:
515 raise ValueError("y(x)=... or similar expected")
516 if context.has_key(self.xname):
517 raise ValueError("xname in context")
518 self.expression = compile(expression.strip(), __file__, "eval")
519 self.columns = {}
520 self.columnnames = [self.xname, self.yname]
522 def dynamiccolumns(self, graph):
523 dynamiccolumns = {self.xname: [], self.yname: []}
525 xaxis = graph.axes[self.xname]
526 from pyx.graph.axis import logarithmic
527 logaxis = isinstance(xaxis.axis, logarithmic)
528 if self.min is not None:
529 min = self.min
530 else:
531 min = xaxis.data.min
532 if self.max is not None:
533 max = self.max
534 else:
535 max = xaxis.data.max
536 if logaxis:
537 min = math.log(min)
538 max = math.log(max)
539 for i in range(self.numberofpoints):
540 x = min + (max-min)*i / (self.numberofpoints-1.0)
541 if logaxis:
542 x = math.exp(x)
543 dynamiccolumns[self.xname].append(x)
544 self.context[self.xname] = x
545 try:
546 y = eval(self.expression, _mathglobals, self.context)
547 except (ArithmeticError, ValueError):
548 y = None
549 dynamiccolumns[self.yname].append(y)
550 return dynamiccolumns
553 class functionxy(function):
555 def __init__(self, f, min=None, max=None, **kwargs):
556 function.__init__(self, "y(x)=f(x)", context={"f": f}, min=min, max=max, **kwargs)
559 class paramfunction(_data):
561 defaultstyles = [style.line()]
563 def __init__(self, varname, min, max, expression, title=_notitle, points=100, context={}):
564 if context.has_key(varname):
565 raise ValueError("varname in context")
566 if title is _notitle:
567 self.title = expression
568 else:
569 self.title = title
570 varlist, expression = expression.split("=")
571 expression = compile(expression.strip(), __file__, "eval")
572 keys = [key.strip() for key in varlist.split(",")]
573 self.columns = dict([(key, []) for key in keys])
574 context = context.copy()
575 for i in range(points):
576 param = min + (max-min)*i / (points-1.0)
577 context[varname] = param
578 values = eval(expression, _mathglobals, context)
579 for key, value in zip(keys, values):
580 self.columns[key].append(value)
581 if len(keys) != len(values):
582 raise ValueError("unpack tuple of wrong size")
583 self.columnnames = self.columns.keys()
586 class paramfunctionxy(paramfunction):
588 def __init__(self, f, min, max, **kwargs):
589 paramfunction.__init__(self, "t", min, max, "x, y = f(t)", context={"f": f}, **kwargs)