remove converter script
[PyX.git] / pyx / graph / data.py
blobef9f7008aa01c80ec01f750b2e1fd24047f275ca
1 # -*- encoding: utf-8 -*-
4 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
5 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
6 # Copyright (C) 2002-2006 André Wobst <wobsta@users.sourceforge.net>
8 # This file is part of PyX (http://pyx.sourceforge.net/).
10 # PyX is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation; either version 2 of the License, or
13 # (at your option) any later version.
15 # PyX is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
20 # You should have received a copy of the GNU General Public License
21 # along with PyX; if not, write to the Free Software
22 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 import math, re, ConfigParser, struct, warnings
25 from pyx import text
26 from pyx.style import linestyle
27 from pyx.graph import style
30 def splitatvalue(value, *splitpoints):
31 section = 0
32 while section < len(splitpoints) and splitpoints[section] < value:
33 section += 1
34 if len(splitpoints) > 1:
35 if section % 2:
36 section = None
37 else:
38 section >>= 1
39 return (section, value)
42 _mathglobals = {"neg": lambda x: -x,
43 "abs": lambda x: x < 0 and -x or x,
44 "sgn": lambda x: x < 0 and -1 or 1,
45 "sqrt": math.sqrt,
46 "exp": math.exp,
47 "log": math.log,
48 "sin": math.sin,
49 "cos": math.cos,
50 "tan": math.tan,
51 "asin": math.asin,
52 "acos": math.acos,
53 "atan": math.atan,
54 "sind": lambda x: math.sin(math.pi/180*x),
55 "cosd": lambda x: math.cos(math.pi/180*x),
56 "tand": lambda x: math.tan(math.pi/180*x),
57 "asind": lambda x: 180/math.pi*math.asin(x),
58 "acosd": lambda x: 180/math.pi*math.acos(x),
59 "atand": lambda x: 180/math.pi*math.atan(x),
60 "norm": lambda x, y: math.hypot(x, y),
61 "splitatvalue": splitatvalue,
62 "pi": math.pi,
63 "e": math.e}
66 class _data:
67 """graph data interface
69 Graph data consists in columns, where each column might be identified by a
70 string or an integer. Each row in the resulting table refers to a data
71 point.
73 All methods except for the constructor should consider self and its
74 attributes to be readonly, since the data instance might be shared between
75 several graphs simultaniously.
77 The instance variable columns is a dictionary mapping column names to the
78 data of the column (i.e. to a list). Only static columns (known at
79 construction time) are contained in that dictionary. For data with numbered
80 columns the column data is also available via the list columndata.
81 Otherwise the columndata list should be missing and an access to a column
82 number will fail.
84 The names of all columns (static and dynamic) must be fixed at the constructor
85 and stated in the columnnames dictionary.
87 The instance variable title and defaultstyles contain the data title and
88 the default styles (a list of styles), respectively.
89 """
91 def dynamiccolumns(self, graph):
92 """create and return dynamic columns data
94 Returns dynamic data matching the given axes (the axes range and other
95 data might be used). The return value is a dictionary similar to the
96 columns instance variable.
97 """
98 return {}
101 defaultsymbols = [style.symbol()]
102 defaultlines = [style.line()]
105 class values(_data):
107 defaultstyles = defaultsymbols
109 def __init__(self, title="user provided values", **columns):
110 for i, values in enumerate(columns.values()):
111 if i and len(values) != l:
112 raise ValueError("different number of values")
113 else:
114 l = len(values)
115 self.columns = columns
116 self.columnnames = columns.keys()
117 self.title = title
120 class points(_data):
121 "Graph data from a list of points"
123 defaultstyles = defaultsymbols
125 def __init__(self, points, title="user provided points", addlinenumbers=1, **columns):
126 if len(points):
127 l = len(points[0])
128 self.columndata = [[x] for x in points[0]]
129 for point in points[1:]:
130 if l != len(point):
131 raise ValueError("different number of columns per point")
132 for i, x in enumerate(point):
133 self.columndata[i].append(x)
134 for v in columns.values():
135 if abs(v) > l or (not addlinenumbers and abs(v) == l):
136 raise ValueError("column number bigger than number of columns")
137 if addlinenumbers:
138 self.columndata = [range(1, len(points) + 1)] + self.columndata
139 self.columns = dict([(key, self.columndata[i]) for key, i in columns.items()])
140 else:
141 self.columns = dict([(key, []) for key, i in columns.items()])
142 self.columnnames = self.columns.keys()
143 self.title = title
146 def list(*args, **kwargs):
147 warnings.warn("graph.data.list is deprecated. Use graph.data.points instead.")
148 return points(*args, **kwargs)
151 class _notitle:
152 pass
154 _columnintref = re.compile(r"\$(-?\d+)", re.IGNORECASE)
156 class data(_data):
157 "creates a new data set out of an existing data set"
159 def __init__(self, data, title=_notitle, context={}, copy=1,
160 replacedollar=1, columncallback="__column__", **columns):
161 # build a nice title
162 if title is _notitle:
163 items = columns.items()
164 items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
165 self.title = "%s: %s" % (text.escapestring(data.title or "unkown source"),
166 ", ".join(["%s=%s" % (text.escapestring(key),
167 text.escapestring(str(value)))
168 for key, value in items]))
169 else:
170 self.title = title
172 self.orgdata = data
173 self.defaultstyles = self.orgdata.defaultstyles
175 # analyse the **columns argument
176 self.columns = {}
177 for columnname, value in columns.items():
178 # search in the columns dictionary
179 try:
180 self.columns[columnname] = self.orgdata.columns[value]
181 except KeyError:
182 # search in the columndata list
183 try:
184 self.columns[columnname] = self.orgdata.columndata[value]
185 except (AttributeError, TypeError):
186 # value was not an valid column identifier
187 # i.e. take it as a mathematical expression
188 if replacedollar:
189 m = _columnintref.search(value)
190 while m:
191 value = "%s%s(%s)%s" % (value[:m.start()], columncallback, m.groups()[0], value[m.end():])
192 m = _columnintref.search(value)
193 value = value.replace("$", columncallback)
194 expression = compile(value.strip(), __file__, "eval")
195 context = context.copy()
196 context[columncallback] = self.columncallback
197 if self.orgdata.columns:
198 key, columndata = self.orgdata.columns.items()[0]
199 count = len(columndata)
200 elif self.orgdata.columndata:
201 count = len(self.orgdata.columndata[0])
202 else:
203 count = 0
204 newdata = []
205 for i in xrange(count):
206 self.columncallbackcount = i
207 for key, values in self.orgdata.columns.items():
208 context[key] = values[i]
209 try:
210 newdata.append(eval(expression, _mathglobals, context))
211 except (ArithmeticError, ValueError):
212 newdata.append(None)
213 self.columns[columnname] = newdata
215 if copy:
216 # copy other, non-conflicting column names
217 for columnname, columndata in self.orgdata.columns.items():
218 if not self.columns.has_key(columnname):
219 self.columns[columnname] = columndata
221 self.columnnames = self.columns.keys()
223 def columncallback(self, value):
224 try:
225 return self.orgdata.columndata[value][self.columncallbackcount]
226 except:
227 return self.orgdata.columns[value][self.columncallbackcount]
230 filecache = {}
232 class file(data):
234 defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
235 defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
236 defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
238 def splitline(self, line, stringpattern, columnpattern, tofloat=1):
239 """returns a tuple created out of the string line
240 - matches stringpattern and columnpattern, adds the first group of that
241 match to the result and and removes those matches until the line is empty
242 - when stringpattern matched, the result is always kept as a string
243 - when columnpattern matched and tofloat is true, a conversion to a float
244 is tried; when this conversion fails, the string is kept"""
245 result = []
246 # try to gain speed by skip matching regular expressions
247 if line.find('"')!=-1 or \
248 stringpattern is not self.defaultstringpattern or \
249 columnpattern is not self.defaultcolumnpattern:
250 while len(line):
251 match = stringpattern.match(line)
252 if match:
253 result.append(match.groups()[0])
254 line = line[match.end():]
255 else:
256 match = columnpattern.match(line)
257 if tofloat:
258 try:
259 result.append(float(match.groups()[0]))
260 except (TypeError, ValueError):
261 result.append(match.groups()[0])
262 else:
263 result.append(match.groups()[0])
264 line = line[match.end():]
265 else:
266 if tofloat:
267 try:
268 return map(float, line.split())
269 except (TypeError, ValueError):
270 result = []
271 for r in line.split():
272 try:
273 result.append(float(r))
274 except (TypeError, ValueError):
275 result.append(r)
276 else:
277 return line.split()
278 return result
280 def getcachekey(self, *args):
281 return ":".join([str(x) for x in args])
283 def __init__(self, filename,
284 commentpattern=defaultcommentpattern,
285 stringpattern=defaultstringpattern,
286 columnpattern=defaultcolumnpattern,
287 skiphead=0, skiptail=0, every=1,
288 **kwargs):
290 def readfile(file, title, self=self, commentpattern=commentpattern, stringpattern=stringpattern, columnpattern=columnpattern, skiphead=skiphead, skiptail=skiptail, every=every):
291 columns = []
292 columndata = []
293 linenumber = 0
294 maxcolumns = 0
295 for line in file.readlines():
296 line = line.strip()
297 match = commentpattern.match(line)
298 if match:
299 if not len(columndata):
300 columns = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
301 else:
302 linedata = []
303 for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
304 linedata.append(value)
305 if len(linedata):
306 if linenumber >= skiphead and not ((linenumber - skiphead) % every):
307 linedata = [linenumber + 1] + linedata
308 if len(linedata) > maxcolumns:
309 maxcolumns = len(linedata)
310 columndata.append(linedata)
311 linenumber += 1
312 if skiptail >= every:
313 skip, x = divmod(skiptail, every)
314 del columndata[-skip:]
315 for i in xrange(len(columndata)):
316 if len(columndata[i]) != maxcolumns:
317 columndata[i].extend([None]*(maxcolumns-len(columndata[i])))
318 return points(columndata, title=title, addlinenumbers=0,
319 **dict([(column, i+1) for i, column in enumerate(columns[:maxcolumns-1])]))
321 try:
322 filename.readlines
323 except:
324 # not a file-like object -> open it
325 cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
326 if not filecache.has_key(cachekey):
327 filecache[cachekey] = readfile(open(filename), filename)
328 data.__init__(self, filecache[cachekey], **kwargs)
329 else:
330 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
333 conffilecache = {}
335 class conffile(data):
337 def __init__(self, filename, **kwargs):
338 """read data from a config-like file
339 - filename is a string
340 - each row is defined by a section in the config-like file (see
341 config module description)
342 - the columns for each row are defined by lines in the section file;
343 the option entries identify and name the columns
344 - further keyword arguments are passed to the constructor of data,
345 keyword arguments data and titles excluded"""
347 def readfile(file, title):
348 config = ConfigParser.ConfigParser()
349 config.optionxform = str
350 config.readfp(file)
351 sections = config.sections()
352 sections.sort()
353 columndata = [None]*len(sections)
354 maxcolumns = 1
355 columns = {}
356 for i in xrange(len(sections)):
357 point = [sections[i]] + [None]*(maxcolumns-1)
358 for option in config.options(sections[i]):
359 value = config.get(sections[i], option)
360 try:
361 value = float(value)
362 except:
363 pass
364 try:
365 index = columns[option]
366 except KeyError:
367 columns[option] = maxcolumns
368 point.append(value)
369 maxcolumns += 1
370 else:
371 point[index] = value
372 columndata[i] = point
373 # wrap result into a data instance to remove column numbers
374 result = data(points(columndata, addlinenumbers=0, **columns), title=title)
375 # ... but reinsert sections as linenumbers
376 result.columndata = [[x[0] for x in columndata]]
377 return result
379 try:
380 filename.readlines
381 except:
382 # not a file-like object -> open it
383 if not filecache.has_key(filename):
384 filecache[filename] = readfile(open(filename), filename)
385 data.__init__(self, filecache[filename], **kwargs)
386 else:
387 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
390 cbdfilecache = {}
392 class cbdfile(data):
394 defaultstyles = defaultlines
396 def getcachekey(self, *args):
397 return ":".join([str(x) for x in args])
399 def __init__(self, filename, minrank=None, maxrank=None, **kwargs):
401 class cbdhead:
403 def __init__(self, file):
404 (self.magic,
405 self.dictaddr,
406 self.segcount,
407 self.segsize,
408 self.segmax,
409 self.fill) = struct.unpack("<5i20s", file.read(40))
410 if self.magic != 0x20770002:
411 raise ValueError("bad magic number")
413 class segdict:
415 def __init__(self, file, i):
416 self.index = i
417 (self.segid,
418 self.maxlat,
419 self.minlat,
420 self.maxlong,
421 self.minlong,
422 self.absaddr,
423 self.nbytes,
424 self.rank) = struct.unpack("<6i2h", file.read(28))
426 class segment:
428 def __init__(self, file, sd):
429 file.seek(sd.absaddr)
430 (self.orgx,
431 self.orgy,
432 self.id,
433 self.nstrokes,
434 self.dummy) = struct.unpack("<3i2h", file.read(16))
435 oln, olt = self.orgx, self.orgy
436 self.points = [(olt, oln)]
437 for i in range(self.nstrokes):
438 c1, c2 = struct.unpack("2c", file.read(2))
439 if ord(c2) & 0x40:
440 if c1 > "\177":
441 dy = ord(c1) - 256
442 else:
443 dy = ord(c1)
444 if c2 > "\177":
445 dx = ord(c2) - 256
446 else:
447 dx = ord(c2) - 64
448 else:
449 c3, c4, c5, c6, c7, c8 = struct.unpack("6c", file.read(6))
450 if c2 > "\177":
451 c2 = chr(ord(c2) | 0x40)
452 dx, dy = struct.unpack("<2i", c3+c4+c1+c2+c7+c8+c5+c6)
453 oln += dx
454 olt += dy
455 self.points.append((olt, oln))
456 sd.nstrokes = self.nstrokes
458 def readfile(file, title):
459 h = cbdhead(file)
460 file.seek(h.dictaddr)
461 sds = [segdict(file, i+1) for i in range(h.segcount)]
462 sbs = [segment(file, sd) for sd in sds]
464 # remove jumps at long +/- 180
465 for sd, sb in zip(sds, sbs):
466 if sd.minlong < -150*3600 and sd.maxlong > 150*3600:
467 for i, (lat, long) in enumerate(sb.points):
468 if long < 0:
469 sb.points[i] = lat, long + 360*3600
471 columndata = []
472 for sd, sb in zip(sds, sbs):
473 if ((minrank is None or sd.rank >= minrank) and
474 (maxrank is None or sd.rank <= maxrank)):
475 if columndata:
476 columndata.append((None, None))
477 columndata.extend([(long/3600.0, lat/3600.0)
478 for lat, long in sb.points])
480 result = points(columndata, title=title)
481 result.defaultstyles = self.defaultstyles
482 return result
485 try:
486 filename.readlines
487 except:
488 # not a file-like object -> open it
489 cachekey = self.getcachekey(filename, minrank, maxrank)
490 if not cbdfilecache.has_key(cachekey):
491 cbdfilecache[cachekey] = readfile(open(filename, "rb"), filename)
492 data.__init__(self, cbdfilecache[cachekey], **kwargs)
493 else:
494 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
497 class function(_data):
499 defaultstyles = defaultlines
501 assignmentpattern = re.compile(r"\s*([a-z_][a-z0-9_]*)\s*\(\s*([a-z_][a-z0-9_]*)\s*\)\s*=", re.IGNORECASE)
503 def __init__(self, expression, title=_notitle, min=None, max=None,
504 points=100, context={}):
506 if title is _notitle:
507 self.title = expression
508 else:
509 self.title = title
510 self.min = min
511 self.max = max
512 self.numberofpoints = points
513 self.context = context.copy() # be safe on late evaluations
514 m = self.assignmentpattern.match(expression)
515 if m:
516 self.yname, self.xname = m.groups()
517 expression = expression[m.end():]
518 else:
519 raise ValueError("y(x)=... or similar expected")
520 if context.has_key(self.xname):
521 raise ValueError("xname in context")
522 self.expression = compile(expression.strip(), __file__, "eval")
523 self.columns = {}
524 self.columnnames = [self.xname, self.yname]
526 def dynamiccolumns(self, graph):
527 dynamiccolumns = {self.xname: [], self.yname: []}
529 xaxis = graph.axes[self.xname]
530 from pyx.graph.axis import logarithmic
531 logaxis = isinstance(xaxis.axis, logarithmic)
532 if self.min is not None:
533 min = self.min
534 else:
535 min = xaxis.data.min
536 if self.max is not None:
537 max = self.max
538 else:
539 max = xaxis.data.max
540 if logaxis:
541 min = math.log(min)
542 max = math.log(max)
543 for i in range(self.numberofpoints):
544 x = min + (max-min)*i / (self.numberofpoints-1.0)
545 if logaxis:
546 x = math.exp(x)
547 dynamiccolumns[self.xname].append(x)
548 self.context[self.xname] = x
549 try:
550 y = eval(self.expression, _mathglobals, self.context)
551 except (ArithmeticError, ValueError):
552 y = None
553 dynamiccolumns[self.yname].append(y)
554 return dynamiccolumns
557 class functionxy(function):
559 def __init__(self, f, min=None, max=None, **kwargs):
560 function.__init__(self, "y(x)=f(x)", context={"f": f}, min=min, max=max, **kwargs)
563 class paramfunction(_data):
565 defaultstyles = defaultlines
567 def __init__(self, varname, min, max, expression, title=_notitle, points=100, context={}):
568 if context.has_key(varname):
569 raise ValueError("varname in context")
570 if title is _notitle:
571 self.title = expression
572 else:
573 self.title = title
574 varlist, expression = expression.split("=")
575 expression = compile(expression.strip(), __file__, "eval")
576 keys = [key.strip() for key in varlist.split(",")]
577 self.columns = dict([(key, []) for key in keys])
578 context = context.copy()
579 for i in range(points):
580 param = min + (max-min)*i / (points-1.0)
581 context[varname] = param
582 values = eval(expression, _mathglobals, context)
583 for key, value in zip(keys, values):
584 self.columns[key].append(value)
585 if len(keys) != len(values):
586 raise ValueError("unpack tuple of wrong size")
587 self.columnnames = self.columns.keys()
590 class paramfunctionxy(paramfunction):
592 def __init__(self, f, min, max, **kwargs):
593 paramfunction.__init__(self, "t", min, max, "x, y = f(t)", context={"f": f}, **kwargs)