added first version of AFM parser#
[PyX/mjg.git] / pyx / graph / data.py
blob882db92099633eab82f36d751dfa01d26f62ce1b
1 #!/usr/bin/env python
2 # -*- coding: ISO-8859-1 -*-
5 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
6 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
7 # Copyright (C) 2002-2005 André Wobst <wobsta@users.sourceforge.net>
9 # This file is part of PyX (http://pyx.sourceforge.net/).
11 # PyX is free software; you can redistribute it and/or modify
12 # it under the terms of the GNU General Public License as published by
13 # the Free Software Foundation; either version 2 of the License, or
14 # (at your option) any later version.
16 # PyX is distributed in the hope that it will be useful,
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 # GNU General Public License for more details.
21 # You should have received a copy of the GNU General Public License
22 # along with PyX; if not, write to the Free Software
23 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 from __future__ import nested_scopes
27 import math, re, ConfigParser, struct, warnings
28 from pyx import text
29 from pyx.style import linestyle
30 from pyx.graph import style
32 try:
33 enumerate([])
34 except NameError:
35 # fallback implementation for Python 2.2 and below
36 def enumerate(list):
37 return zip(xrange(len(list)), list)
39 try:
40 dict()
41 except NameError:
42 # fallback implementation for Python 2.1
43 def dict(items):
44 result = {}
45 for key, value in items:
46 result[key] = value
47 return result
50 def splitatvalue(value, *splitpoints):
51 section = 0
52 while section < len(splitpoints) and splitpoints[section] < value:
53 section += 1
54 if len(splitpoints) > 1:
55 if section % 2:
56 section = None
57 else:
58 section >>= 1
59 return (section, value)
62 _mathglobals = {"neg": lambda x: -x,
63 "abs": lambda x: x < 0 and -x or x,
64 "sgn": lambda x: x < 0 and -1 or 1,
65 "sqrt": math.sqrt,
66 "exp": math.exp,
67 "log": math.log,
68 "sin": math.sin,
69 "cos": math.cos,
70 "tan": math.tan,
71 "asin": math.asin,
72 "acos": math.acos,
73 "atan": math.atan,
74 "sind": lambda x: math.sin(math.pi/180*x),
75 "cosd": lambda x: math.cos(math.pi/180*x),
76 "tand": lambda x: math.tan(math.pi/180*x),
77 "asind": lambda x: 180/math.pi*math.asin(x),
78 "acosd": lambda x: 180/math.pi*math.acos(x),
79 "atand": lambda x: 180/math.pi*math.atan(x),
80 "norm": lambda x, y: math.hypot(x, y),
81 "splitatvalue": splitatvalue,
82 "pi": math.pi,
83 "e": math.e}
86 class _data:
87 """graph data interface
89 Graph data consists in columns, where each column might be identified by a
90 string or an integer. Each row in the resulting table refers to a data
91 point.
93 All methods except for the constructor should consider self and its
94 attributes to be readonly, since the data instance might be shared between
95 several graphs simultaniously.
97 The instance variable columns is a dictionary mapping column names to the
98 data of the column (i.e. to a list). Only static columns (known at
99 construction time) are contained in that dictionary. For data with numbered
100 columns the column data is also available via the list columndata.
101 Otherwise the columndata list should be missing and an access to a column
102 number will fail.
104 The names of all columns (static and dynamic) must be fixed at the constructor
105 and stated in the columnnames dictionary.
107 The instance variable title and defaultstyles contain the data title and
108 the default styles (a list of styles), respectively.
111 def dynamiccolumns(self, graph):
112 """create and return dynamic columns data
114 Returns dynamic data matching the given axes (the axes range and other
115 data might be used). The return value is a dictionary similar to the
116 columns instance variable.
118 return {}
121 class list(_data):
122 "Graph data from a list of points"
124 defaultstyles = [style.symbol()]
126 def __init__(self, points, title="user provided list", addlinenumbers=1, **columns):
127 if len(points):
128 l = len(points[0])
129 self.columndata = [[x] for x in points[0]]
130 for point in points[1:]:
131 if l != len(point):
132 raise ValueError("different number of columns per point")
133 for i, x in enumerate(point):
134 self.columndata[i].append(x)
135 for v in columns.values():
136 if abs(v) > l or (not addlinenumbers and abs(v) == l):
137 raise ValueError("column number bigger than number of columns")
138 if addlinenumbers:
139 self.columndata = [range(1, len(points) + 1)] + self.columndata
140 self.columns = dict([(key, self.columndata[i]) for key, i in columns.items()])
141 else:
142 self.columns = dict([(key, []) for key, i in columns])
143 self.columnnames = self.columns.keys()
144 self.title = title
145 self.defaultstyles = [style.symbol()]
148 class _notitle:
149 pass
151 _columnintref = re.compile(r"\$(-?\d+)", re.IGNORECASE)
153 class data(_data):
154 "creates a new data set out of an existing data set"
156 def __init__(self, data, title=_notitle, context={}, copy=1,
157 replacedollar=1, columncallback="__column__", **columns):
158 # build a nice title
159 if title is _notitle:
160 items = columns.items()
161 items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
162 self.title = "%s: %s" % (text.escapestring(data.title or "unkown source"),
163 ", ".join(["%s=%s" % (text.escapestring(key),
164 text.escapestring(str(value)))
165 for key, value in items]))
166 else:
167 self.title = title
169 self.orgdata = data
170 self.defaultstyles = self.orgdata.defaultstyles
172 # analyse the **columns argument
173 self.columns = {}
174 for columnname, value in columns.items():
175 # search in the columns dictionary
176 try:
177 self.columns[columnname] = self.orgdata.columns[value]
178 except KeyError:
179 # search in the columndata list
180 try:
181 self.columns[columnname] = self.orgdata.columndata[value]
182 except (AttributeError, TypeError):
183 # value was not an valid column identifier
184 # i.e. take it as a mathematical expression
185 if replacedollar:
186 m = _columnintref.search(value)
187 while m:
188 value = "%s%s(%s)%s" % (value[:m.start()], columncallback, m.groups()[0], value[m.end():])
189 m = _columnintref.search(value)
190 value = value.replace("$", columncallback)
191 expression = compile(value.strip(), __file__, "eval")
192 context = context.copy()
193 context[columncallback] = self.columncallback
194 if self.orgdata.columns:
195 key, columndata = self.orgdata.columns.items()[0]
196 count = len(columndata)
197 elif self.orgdata.columndata:
198 count = len(self.orgdata.columndata[0])
199 else:
200 count = 0
201 newdata = []
202 for i in xrange(count):
203 self.columncallbackcount = i
204 for key, values in self.orgdata.columns.items():
205 context[key] = values[i]
206 try:
207 newdata.append(eval(expression, _mathglobals, context))
208 except (ArithmeticError, ValueError):
209 newdata.append(None)
210 self.columns[columnname] = newdata
212 if copy:
213 # copy other, non-conflicting column names
214 for columnname, columndata in self.orgdata.columns.items():
215 if not self.columns.has_key(columnname):
216 self.columns[columnname] = columndata
218 self.columnnames = self.columns.keys()
220 def columncallback(self, value):
221 try:
222 return self.orgdata.columndata[value][self.columncallbackcount]
223 except:
224 return self.orgdata.columns[value][self.columncallbackcount]
227 filecache = {}
229 class file(data):
231 defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
232 defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
233 defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
235 def splitline(self, line, stringpattern, columnpattern, tofloat=1):
236 """returns a tuple created out of the string line
237 - matches stringpattern and columnpattern, adds the first group of that
238 match to the result and and removes those matches until the line is empty
239 - when stringpattern matched, the result is always kept as a string
240 - when columnpattern matched and tofloat is true, a conversion to a float
241 is tried; when this conversion fails, the string is kept"""
242 result = []
243 # try to gain speed by skip matching regular expressions
244 if line.find('"')!=-1 or \
245 stringpattern is not self.defaultstringpattern or \
246 columnpattern is not self.defaultcolumnpattern:
247 while len(line):
248 match = stringpattern.match(line)
249 if match:
250 result.append(match.groups()[0])
251 line = line[match.end():]
252 else:
253 match = columnpattern.match(line)
254 if tofloat:
255 try:
256 result.append(float(match.groups()[0]))
257 except (TypeError, ValueError):
258 result.append(match.groups()[0])
259 else:
260 result.append(match.groups()[0])
261 line = line[match.end():]
262 else:
263 if tofloat:
264 try:
265 return map(float, line.split())
266 except (TypeError, ValueError):
267 result = []
268 for r in line.split():
269 try:
270 result.append(float(r))
271 except (TypeError, ValueError):
272 result.append(r)
273 else:
274 return line.split()
275 return result
277 def getcachekey(self, *args):
278 return ":".join([str(x) for x in args])
280 def __init__(self, filename,
281 commentpattern=defaultcommentpattern,
282 stringpattern=defaultstringpattern,
283 columnpattern=defaultcolumnpattern,
284 skiphead=0, skiptail=0, every=1,
285 **kwargs):
287 def readfile(file, title, self=self, commentpattern=commentpattern, stringpattern=stringpattern, columnpattern=columnpattern, skiphead=skiphead, skiptail=skiptail, every=every):
288 columns = []
289 columndata = []
290 linenumber = 0
291 maxcolumns = 0
292 for line in file.readlines():
293 line = line.strip()
294 match = commentpattern.match(line)
295 if match:
296 if not len(columndata):
297 columns = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
298 else:
299 linedata = []
300 for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
301 linedata.append(value)
302 if len(linedata):
303 if linenumber >= skiphead and not ((linenumber - skiphead) % every):
304 linedata = [linenumber + 1] + linedata
305 if len(linedata) > maxcolumns:
306 maxcolumns = len(linedata)
307 columndata.append(linedata)
308 linenumber += 1
309 if skiptail >= every:
310 skip, x = divmod(skiptail, every)
311 del columndata[-skip:]
312 for i in xrange(len(columndata)):
313 if len(columndata[i]) != maxcolumns:
314 columndata[i].extend([None]*(maxcolumns-len(columndata[i])))
315 return list(columndata, title=title, addlinenumbers=0,
316 **dict([(column, i+1) for i, column in enumerate(columns[:maxcolumns-1])]))
318 try:
319 filename.readlines
320 except:
321 # not a file-like object -> open it
322 cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
323 if not filecache.has_key(cachekey):
324 filecache[cachekey] = readfile(open(filename), filename)
325 data.__init__(self, filecache[cachekey], **kwargs)
326 else:
327 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
330 conffilecache = {}
332 class conffile(data):
334 def __init__(self, filename, **kwargs):
335 """read data from a config-like file
336 - filename is a string
337 - each row is defined by a section in the config-like file (see
338 config module description)
339 - the columns for each row are defined by lines in the section file;
340 the option entries identify and name the columns
341 - further keyword arguments are passed to the constructor of data,
342 keyword arguments data and titles excluded"""
344 def readfile(file, title):
345 config = ConfigParser.ConfigParser()
346 config.optionxform = str
347 config.readfp(file)
348 sections = config.sections()
349 sections.sort()
350 columndata = [None]*len(sections)
351 maxcolumns = 1
352 columns = {}
353 for i in xrange(len(sections)):
354 point = [sections[i]] + [None]*(maxcolumns-1)
355 for option in config.options(sections[i]):
356 value = config.get(sections[i], option)
357 try:
358 value = float(value)
359 except:
360 pass
361 try:
362 index = columns[option]
363 except KeyError:
364 columns[option] = maxcolumns
365 point.append(value)
366 maxcolumns += 1
367 else:
368 point[index] = value
369 columndata[i] = point
370 # wrap result into a data instance to remove column numbers
371 result = data(list(columndata, addlinenumbers=0, **columns), title=title)
372 # ... but reinsert sections as linenumbers
373 result.columndata = [[x[0] for x in columndata]]
374 return result
376 try:
377 filename.readlines
378 except:
379 # not a file-like object -> open it
380 if not filecache.has_key(filename):
381 filecache[filename] = readfile(open(filename), filename)
382 data.__init__(self, filecache[filename], **kwargs)
383 else:
384 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
387 cbdfilecache = {}
389 class cbdfile(data):
391 def getcachekey(self, *args):
392 return ":".join([str(x) for x in args])
394 def __init__(self, filename, minrank=None, maxrank=None, **kwargs):
396 class cbdhead:
398 def __init__(self, file):
399 (self.magic,
400 self.dictaddr,
401 self.segcount,
402 self.segsize,
403 self.segmax,
404 self.fill) = struct.unpack("<5i20s", file.read(40))
405 if self.magic != 0x20770002:
406 raise ValueError("bad magic number")
408 class segdict:
410 def __init__(self, file, i):
411 self.index = i
412 (self.segid,
413 self.maxlat,
414 self.minlat,
415 self.maxlong,
416 self.minlong,
417 self.absaddr,
418 self.nbytes,
419 self.rank) = struct.unpack("<6i2h", file.read(28))
421 class segment:
423 def __init__(self, file, sd):
424 file.seek(sd.absaddr)
425 (self.orgx,
426 self.orgy,
427 self.id,
428 self.nstrokes,
429 self.dummy) = struct.unpack("<3i2h", file.read(16))
430 oln, olt = self.orgx, self.orgy
431 self.points = [(olt, oln)]
432 for i in range(self.nstrokes):
433 c1, c2 = struct.unpack("2c", file.read(2))
434 if ord(c2) & 0x40:
435 if c1 > "\177":
436 dy = ord(c1) - 256
437 else:
438 dy = ord(c1)
439 if c2 > "\177":
440 dx = ord(c2) - 256
441 else:
442 dx = ord(c2) - 64
443 else:
444 c3, c4, c5, c6, c7, c8 = struct.unpack("6c", file.read(6))
445 if c2 > "\177":
446 c2 = chr(ord(c2) | 0x40)
447 dx, dy = struct.unpack("<2i", c3+c4+c1+c2+c7+c8+c5+c6)
448 oln += dx
449 olt += dy
450 self.points.append((olt, oln))
451 sd.nstrokes = self.nstrokes
453 def readfile(file, title):
454 h = cbdhead(file)
455 file.seek(h.dictaddr)
456 sds = [segdict(file, i+1) for i in range(h.segcount)]
457 sbs = [segment(file, sd) for sd in sds]
459 # remove jumps at long +/- 180
460 for sd, sb in zip(sds, sbs):
461 if sd.minlong < -150*3600 and sd.maxlong > 150*3600:
462 for i, (lat, long) in enumerate(sb.points):
463 if long < 0:
464 sb.points[i] = lat, long + 360*3600
466 columndata = []
467 for sd, sb in zip(sds, sbs):
468 if ((minrank is None or sd.rank >= minrank) and
469 (maxrank is None or sd.rank <= maxrank)):
470 if columndata:
471 columndata.append((None, None))
472 columndata.extend([(long/3600.0, lat/3600.0)
473 for lat, long in sb.points])
475 result = list(columndata, title=title)
476 result.defaultstyles = [style.line()]
477 return result
480 try:
481 filename.readlines
482 except:
483 # not a file-like object -> open it
484 cachekey = self.getcachekey(filename, minrank, maxrank)
485 if not cbdfilecache.has_key(cachekey):
486 cbdfilecache[cachekey] = readfile(open(filename, "rb"), filename)
487 data.__init__(self, cbdfilecache[cachekey], **kwargs)
488 else:
489 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
492 class function(_data):
494 defaultstyles = [style.line()]
496 assignmentpattern = re.compile(r"\s*([a-z_][a-z0-9_]*)\s*\(\s*([a-z_][a-z0-9_]*)\s*\)\s*=", re.IGNORECASE)
498 def __init__(self, expression, title=_notitle, min=None, max=None,
499 points=100, context={}):
501 if title is _notitle:
502 self.title = expression
503 else:
504 self.title = title
505 self.min = min
506 self.max = max
507 self.numberofpoints = points
508 self.context = context.copy() # be save on late evaluations
509 m = self.assignmentpattern.match(expression)
510 if m:
511 self.yname, self.xname = m.groups()
512 expression = expression[m.end():]
513 else:
514 raise ValueError("y(x)=... or similar expected")
515 if context.has_key(self.xname):
516 raise ValueError("xname in context")
517 self.expression = compile(expression.strip(), __file__, "eval")
518 self.columns = {}
519 self.columnnames = [self.xname, self.yname]
521 def dynamiccolumns(self, graph):
522 dynamiccolumns = {self.xname: [], self.yname: []}
524 xaxis = graph.axes[self.xname]
525 from pyx.graph.axis import logarithmic
526 logaxis = isinstance(xaxis.axis, logarithmic)
527 if self.min is not None:
528 min = self.min
529 else:
530 min = xaxis.data.min
531 if self.max is not None:
532 max = self.max
533 else:
534 max = xaxis.data.max
535 if logaxis:
536 min = math.log(min)
537 max = math.log(max)
538 for i in range(self.numberofpoints):
539 x = min + (max-min)*i / (self.numberofpoints-1.0)
540 if logaxis:
541 x = math.exp(x)
542 dynamiccolumns[self.xname].append(x)
543 self.context[self.xname] = x
544 try:
545 y = eval(self.expression, _mathglobals, self.context)
546 except (ArithmeticError, ValueError):
547 y = None
548 dynamiccolumns[self.yname].append(y)
549 return dynamiccolumns
552 class functionxy(function):
554 def __init__(self, f, min=None, max=None, **kwargs):
555 function.__init__(self, "y(x)=f(x)", context={"f": f}, min=min, max=max, **kwargs)
558 class paramfunction(_data):
560 defaultstyles = [style.line()]
562 def __init__(self, varname, min, max, expression, title=_notitle, points=100, context={}):
563 if context.has_key(varname):
564 raise ValueError("varname in context")
565 if title is _notitle:
566 self.title = expression
567 else:
568 self.title = title
569 varlist, expression = expression.split("=")
570 expression = compile(expression.strip(), __file__, "eval")
571 keys = [key.strip() for key in varlist.split(",")]
572 self.columns = dict([(key, []) for key in keys])
573 context = context.copy()
574 for i in range(points):
575 param = min + (max-min)*i / (points-1.0)
576 context[varname] = param
577 values = eval(expression, _mathglobals, context)
578 for key, value in zip(keys, values):
579 self.columns[key].append(value)
580 if len(keys) != len(values):
581 raise ValueError("unpack tuple of wrong size")
582 self.columnnames = self.columns.keys()
585 class paramfunctionxy(paramfunction):
587 def __init__(self, f, min, max, **kwargs):
588 paramfunction.__init__(self, "t", min, max, "x, y = f(t)", context={"f": f}, **kwargs)