arct to arc/arcn conversion has been fixed
[PyX/mjg.git] / pyx / graph / data.py
blob3b73b3e1abfaec1f86e089195c3d1088ae6b80a6
1 #!/usr/bin/env python
2 # -*- coding: ISO-8859-1 -*-
5 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
6 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
7 # Copyright (C) 2002-2004 André Wobst <wobsta@users.sourceforge.net>
9 # This file is part of PyX (http://pyx.sourceforge.net/).
11 # PyX is free software; you can redistribute it and/or modify
12 # it under the terms of the GNU General Public License as published by
13 # the Free Software Foundation; either version 2 of the License, or
14 # (at your option) any later version.
16 # PyX is distributed in the hope that it will be useful,
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 # GNU General Public License for more details.
21 # You should have received a copy of the GNU General Public License
22 # along with PyX; if not, write to the Free Software
23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 import math, re, ConfigParser, struct, warnings
27 from pyx import mathtree, text
28 from pyx.style import linestyle
29 from pyx.graph import style
31 try:
32 enumerate([])
33 except NameError:
34 # fallback implementation for Python 2.2 and below
35 def enumerate(list):
36 return zip(xrange(len(list)), list)
38 try:
39 dict()
40 except NameError:
41 # fallback implementation for Python 2.1
42 def dict(items):
43 result = {}
44 for key, value in items:
45 result[key] = value
46 return result
49 class _data:
50 """graph data interface
52 Graph data consists in columns, where each column might be identified by a
53 string or an integer. Each row in the resulting table refers to a data
54 point.
56 All methods except for the constructor should consider self and its
57 attributes to be readonly, since the data instance might be shared between
58 several graphs simultaniously.
60 The instance variable columns is a dictionary mapping column names to the
61 data of the column (i.e. to a list). Only static columns (known at
62 construction time) are contained in that dictionary. For data with numbered
63 columns the column data is also available via the list columndata.
64 Otherwise the columndata list should be missing and an access to a column
65 number will fail.
67 The instance variable title and defaultstyles contain the data title and
68 the default styles (a list of styles), respectively.
69 """
71 def columnnames(self, graph):
72 """return a list of column names
74 Currently the column names might depend on the axes names. This dynamic
75 nature is subject of removal for the future. Then the method could be
76 replaced by an instance variable already initialized in the contructor.
78 The result will be self.columns.keys() + self.dynamiccolums.keys(), but
79 the later can only be called after the static axes ranges have been
80 fixed. OTOH the column names are already needed in the initialization
81 process of the styles sharedata and privatedata.
82 """
83 return self.columns.keys()
85 def dynamiccolumns(self, graph):
86 """create and return dynamic columns data
88 Returns dynamic data matching the given axes (the axes range and other
89 data might be used). The return value is a dictionary similar to the
90 columns instance variable.
91 """
92 return {}
95 class list(_data):
96 "Graph data from a list of points"
98 defaultstyles = [style.symbol()]
100 def __init__(self, points, title="user provided list", addlinenumbers=1, **columns):
101 if len(points):
102 l = len(points[0])
103 self.columndata = [[x] for x in points[0]]
104 for point in points[1:]:
105 if l != len(point):
106 raise ValueError("different number of columns per point")
107 for i, x in enumerate(point):
108 self.columndata[i].append(x)
109 for v in columns.values():
110 if abs(v) > l or (not addlinenumbers and abs(v) == l):
111 raise ValueError("column number bigger than number of columns")
112 if addlinenumbers:
113 self.columndata = [range(1, len(points) + 1)] + self.columndata
114 self.columns = dict([(key, self.columndata[i]) for key, i in columns.items()])
115 else:
116 self.columns = dict([(key, []) for key, i in columns])
117 self.title = title
118 self.defaultstyles = [style.symbol()]
122 ##############################################################
123 # math tree enhanced by column number variables
124 ##############################################################
126 class MathTreeFuncCol(mathtree.MathTreeFunc1):
128 def __init__(self, *args):
129 mathtree.MathTreeFunc1.__init__(self, "_column_", *args)
131 def VarList(self):
132 # we misuse VarList here:
133 # - instead of returning a string, we return this instance itself
134 # - before calculating the expression, you must call ColumnNameAndNumber
135 # once (when limiting the context to external defined variables,
136 # otherwise you have to call it each time)
137 return [self]
139 def ColumnNameAndNumber(_hidden_self, **args):
140 number = int(_hidden_self.Args[0].Calc(**args))
141 _hidden_self.varname = "_column_%i" % number
142 return _hidden_self.varname, number
144 def __str__(self):
145 return self.varname
147 def Calc(_hidden_self, **args):
148 return args[_hidden_self.varname]
150 MathTreeFuncsWithCol = mathtree.DefaultMathTreeFuncs + [MathTreeFuncCol]
153 class columntree:
155 def __init__(self, tree):
156 self.tree = tree
157 self.Calc = tree.Calc
158 self.__str__ = tree.__str__
160 def VarList(self):
161 # returns a list of regular variables (strings) like the original mathtree
162 return [var for var in self.tree.VarList() if not isinstance(var, MathTreeFuncCol) and var[:8] != "_column_"]
164 def columndict(_hidden_self, **context):
165 # returns a dictionary of column names (keys) and column numbers (values)
166 columndict = {}
167 for var in _hidden_self.tree.VarList():
168 if isinstance(var, MathTreeFuncCol):
169 name, number = var.ColumnNameAndNumber(**context)
170 columndict[name] = number
171 elif var[:8] == "_column_":
172 columndict[var] = int(var[8:])
173 return columndict
176 class dataparser(mathtree.parser):
177 # mathtree parser enhanced by column handling
178 # parse returns a columntree instead of a regular tree
180 def __init__(self, MathTreeFuncs=MathTreeFuncsWithCol, **kwargs):
181 mathtree.parser.__init__(self, MathTreeFuncs=MathTreeFuncs, **kwargs)
183 def parse(self, expr):
184 return columntree(mathtree.parser.parse(self, expr.replace("$", "_column_")))
186 ##############################################################
189 class _notitle:
190 pass
192 class data(_data):
193 "creates a new data set out of an existing data set"
195 def __init__(self, data, title=_notitle, parser=dataparser(), context={}, copy=1, **columns):
196 # build a nice title
197 if title is _notitle:
198 items = columns.items()
199 items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
200 self.title = "%s: %s" % (data.title,
201 ", ".join(["%s=%s" % (text.escapestring(key),
202 text.escapestring(value))
203 for key, value in items]))
204 else:
205 self.title = title
207 self.orgdata = data
208 self.defaultstyles = self.orgdata.defaultstyles
210 # analyse the **columns argument
211 self.columns = {}
212 for columnname, value in columns.items():
213 try:
214 self.columns[columnname] = self.orgdata.columns[value]
215 except:
216 pass
217 try:
218 self.columns[columnname] = self.orgdata.columndata[value]
219 except:
220 pass
221 # value was not an valid column identifier
222 if not self.columns.has_key(columnname):
223 # take it as a mathematical expression
224 tree = parser.parse(value)
225 columndict = tree.columndict(**context)
226 vars = {}
227 for var, columnnumber in columndict.items():
228 # column data accessed via $<column number>
229 vars[var] = self.orgdata.columndata[columnnumber]
230 for var in tree.VarList():
231 try:
232 # column data accessed via the name of the column
233 vars[var] = self.orgdata.columns[var]
234 except (KeyError, ValueError):
235 # other data available in context
236 if var not in context.keys():
237 raise ValueError("undefined variable '%s'" % var)
238 newdata = []
239 usevars = context.copy() # do not modify context, use a copy vars instead
240 if self.orgdata.columns:
241 key, columndata = self.orgdata.columns.items()[0]
242 count = len(columndata)
243 elif self.orgdata.columndata:
244 count = len(self.orgdata.columndata[0])
245 else:
246 count = 0
247 for i in xrange(count):
248 # insert column data as prepared in vars
249 for var, columndata in vars.items():
250 usevars[var] = columndata[i]
251 # evaluate expression
252 try:
253 newdata.append(tree.Calc(**usevars))
254 except (ArithmeticError, ValueError):
255 newdata.append(None)
256 # we could also do:
257 # point[newcolumnnumber] = eval(str(tree), vars)
259 # XXX: It might happen, that the evaluation of the expression
260 # seems to work, but the result is NaN/Inf/-Inf. This
261 # is highly plattform dependend.
263 self.columns[columnname] = newdata
265 if copy:
266 # copy other, non-conflicting column names
267 for columnname, columndata in self.orgdata.columns.items():
268 if not self.columns.has_key(columnname):
269 self.columns[columnname] = columndata
271 def getcolumnpointsindex(self, column):
272 return self.columns[column]
275 filecache = {}
277 class file(data):
279 defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
280 defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
281 defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
283 def splitline(self, line, stringpattern, columnpattern, tofloat=1):
284 """returns a tuple created out of the string line
285 - matches stringpattern and columnpattern, adds the first group of that
286 match to the result and and removes those matches until the line is empty
287 - when stringpattern matched, the result is always kept as a string
288 - when columnpattern matched and tofloat is true, a conversion to a float
289 is tried; when this conversion fails, the string is kept"""
290 result = []
291 # try to gain speed by skip matching regular expressions
292 if line.find('"')!=-1 or \
293 stringpattern is not self.defaultstringpattern or \
294 columnpattern is not self.defaultcolumnpattern:
295 while len(line):
296 match = stringpattern.match(line)
297 if match:
298 result.append(match.groups()[0])
299 line = line[match.end():]
300 else:
301 match = columnpattern.match(line)
302 if tofloat:
303 try:
304 result.append(float(match.groups()[0]))
305 except (TypeError, ValueError):
306 result.append(match.groups()[0])
307 else:
308 result.append(match.groups()[0])
309 line = line[match.end():]
310 else:
311 if tofloat:
312 try:
313 return map(float, line.split())
314 except (TypeError, ValueError):
315 result = []
316 for r in line.split():
317 try:
318 result.append(float(r))
319 except (TypeError, ValueError):
320 result.append(r)
321 else:
322 return line.split()
323 return result
325 def getcachekey(self, *args):
326 return ":".join([str(x) for x in args])
328 def __init__(self, filename,
329 commentpattern=defaultcommentpattern,
330 stringpattern=defaultstringpattern,
331 columnpattern=defaultcolumnpattern,
332 skiphead=0, skiptail=0, every=1,
333 **kwargs):
335 def readfile(file, title, self=self, commentpattern=commentpattern, stringpattern=stringpattern, columnpattern=columnpattern, skiphead=skiphead, skiptail=skiptail, every=every):
336 columns = []
337 columndata = []
338 linenumber = 0
339 maxcolumns = 0
340 for line in file.readlines():
341 line = line.strip()
342 match = commentpattern.match(line)
343 if match:
344 if not len(columndata):
345 columns = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
346 else:
347 linedata = []
348 for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
349 linedata.append(value)
350 if len(linedata):
351 if linenumber >= skiphead and not ((linenumber - skiphead) % every):
352 linedata = [linenumber + 1] + linedata
353 if len(linedata) > maxcolumns:
354 maxcolumns = len(linedata)
355 columndata.append(linedata)
356 linenumber += 1
357 if skiptail >= every:
358 skip, x = divmod(skiptail, every)
359 del columndata[-skip:]
360 for i in xrange(len(columndata)):
361 if len(columndata[i]) != maxcolumns:
362 columndata[i].extend([None]*(maxcolumns-len(columndata[i])))
363 return list(columndata, title=title, addlinenumbers=0,
364 **dict([(column, i+1) for i, column in enumerate(columns[:maxcolumns-1])]))
366 try:
367 filename.readlines
368 except:
369 # not a file-like object -> open it
370 cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
371 if not filecache.has_key(cachekey):
372 filecache[cachekey] = readfile(open(filename), filename)
373 data.__init__(self, filecache[cachekey], **kwargs)
374 else:
375 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
378 conffilecache = {}
380 class conffile(data):
382 def __init__(self, filename, **kwargs):
383 """read data from a config-like file
384 - filename is a string
385 - each row is defined by a section in the config-like file (see
386 config module description)
387 - the columns for each row are defined by lines in the section file;
388 the option entries identify and name the columns
389 - further keyword arguments are passed to the constructor of data,
390 keyword arguments data and titles excluded"""
392 def readfile(file, title):
393 config = ConfigParser.ConfigParser()
394 config.optionxform = str
395 config.readfp(file)
396 sections = config.sections()
397 sections.sort()
398 columndata = [None]*len(sections)
399 maxcolumns = 1
400 columns = {}
401 for i in xrange(len(sections)):
402 point = [sections[i]] + [None]*(maxcolumns-1)
403 for option in config.options(sections[i]):
404 value = config.get(sections[i], option)
405 try:
406 value = float(value)
407 except:
408 pass
409 try:
410 index = columns[option]
411 except KeyError:
412 columns[option] = maxcolumns
413 point.append(value)
414 maxcolumns += 1
415 else:
416 point[index] = value
417 columndata[i] = point
418 # wrap result into a data instance to remove column numbers
419 result = data(list(columndata, addlinenumbers=0, **columns), title=title)
420 # ... but reinsert sections as linenumbers
421 result.columndata = [[x[0] for x in columndata]]
422 return result
424 try:
425 filename.readlines
426 except:
427 # not a file-like object -> open it
428 if not filecache.has_key(filename):
429 filecache[filename] = readfile(open(filename), filename)
430 data.__init__(self, filecache[filename], **kwargs)
431 else:
432 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
435 cbdfilecache = {}
437 class cbdfile(data):
439 def getcachekey(self, *args):
440 return ":".join([str(x) for x in args])
442 def __init__(self, filename, minrank=None, maxrank=None, **kwargs):
444 class cbdhead:
446 def __init__(self, file):
447 (self.magic,
448 self.dictaddr,
449 self.segcount,
450 self.segsize,
451 self.segmax,
452 self.fill) = struct.unpack("<5i20s", file.read(40))
453 if self.magic != 0x20770002:
454 raise ValueError("bad magic number")
456 class segdict:
458 def __init__(self, file, i):
459 self.index = i
460 (self.segid,
461 self.maxlat,
462 self.minlat,
463 self.maxlong,
464 self.minlong,
465 self.absaddr,
466 self.nbytes,
467 self.rank) = struct.unpack("<6i2h", file.read(28))
469 class segment:
471 def __init__(self, file, sd):
472 file.seek(sd.absaddr)
473 (self.orgx,
474 self.orgy,
475 self.id,
476 self.nstrokes,
477 self.dummy) = struct.unpack("<3i2h", file.read(16))
478 oln, olt = self.orgx, self.orgy
479 self.points = [(olt, oln)]
480 for i in range(self.nstrokes):
481 c1, c2 = struct.unpack("2c", file.read(2))
482 if ord(c2) & 0x40:
483 if c1 > "\177":
484 dy = ord(c1) - 256
485 else:
486 dy = ord(c1)
487 if c2 > "\177":
488 dx = ord(c2) - 256
489 else:
490 dx = ord(c2) - 64
491 else:
492 c3, c4, c5, c6, c7, c8 = struct.unpack("6c", file.read(6))
493 if c2 > "\177":
494 c2 = chr(ord(c2) | 0x40)
495 dx, dy = struct.unpack("<2i", c3+c4+c1+c2+c7+c8+c5+c6)
496 oln += dx
497 olt += dy
498 self.points.append((olt, oln))
499 sd.nstrokes = self.nstrokes
501 def readfile(file, title):
502 h = cbdhead(file)
503 file.seek(h.dictaddr)
504 sds = [segdict(file, i+1) for i in range(h.segcount)]
505 sbs = [segment(file, sd) for sd in sds]
507 # remove jumps at long +/- 180
508 for sd, sb in zip(sds, sbs):
509 if sd.minlong < -150*3600 and sd.maxlong > 150*3600:
510 for i, (lat, long) in enumerate(sb.points):
511 if long < 0:
512 sb.points[i] = lat, long + 360*3600
514 columndata = []
515 for sd, sb in zip(sds, sbs):
516 if ((minrank is None or sd.rank >= minrank) and
517 (maxrank is None or sd.rank <= maxrank)):
518 if columndata:
519 columndata.append((None, None))
520 columndata.extend([(long/3600.0, lat/3600.0)
521 for lat, long in sb.points])
523 result = list(columndata, title=title)
524 result.defaultstyles = [style.line()]
525 return result
528 try:
529 filename.readlines
530 except:
531 # not a file-like object -> open it
532 cachekey = self.getcachekey(filename, minrank, maxrank)
533 if not cbdfilecache.has_key(cachekey):
534 cbdfilecache[cachekey] = readfile(open(filename, "rb"), filename)
535 data.__init__(self, cbdfilecache[cachekey], **kwargs)
536 else:
537 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
541 class function(_data):
543 defaultstyles = [style.line()]
545 assignmentpattern = re.compile(r"\s*([a-z_][a-z0-9_]*)\s*\(\s*([a-z_][a-z0-9_]*)\s*\)\s*=", re.IGNORECASE)
547 def __init__(self, expression, title=_notitle, min=None, max=None,
548 points=100, parser=mathtree.parser(), context={}):
550 if title is _notitle:
551 self.title = expression
552 else:
553 self.title = title
554 self.min = min
555 self.max = max
556 self.numberofpoints = points
557 self.context = context.copy() # be save on late evaluations
558 m = self.assignmentpattern.match(expression)
559 if m:
560 self.yname, self.xname = m.groups()
561 expression = expression[m.end():]
562 else:
563 warnings.warn("implicit variables are deprecated, use y(x)=... and the like", DeprecationWarning)
564 self.xname = None
565 self.yname, expression = [x.strip() for x in expression.split("=")]
566 self.mathtree = parser.parse(expression)
567 self.columns = {}
569 def columnnames(self, graph):
570 if self.xname is None:
571 for xname in self.mathtree.VarList():
572 if xname in graph.axes.keys():
573 if self.xname is None:
574 self.xname = xname
575 else:
576 raise ValueError("multiple variables found")
577 if self.xname is None:
578 raise ValueError("no variable found")
579 return [self.xname, self.yname]
581 def dynamiccolumns(self, graph):
582 dynamiccolumns = {self.xname: [], self.yname: []}
584 xaxis = graph.axes[self.xname]
585 from pyx.graph.axis import logarithmic
586 logaxis = isinstance(xaxis.axis, logarithmic)
587 if self.min is not None:
588 min = self.min
589 else:
590 min = xaxis.data.min
591 if self.max is not None:
592 max = self.max
593 else:
594 max = xaxis.data.max
595 if logaxis:
596 min = math.log(min)
597 max = math.log(max)
598 for i in range(self.numberofpoints):
599 x = min + (max-min)*i / (self.numberofpoints-1.0)
600 if logaxis:
601 x = math.exp(x)
602 dynamiccolumns[self.xname].append(x)
603 self.context[self.xname] = x
604 try:
605 y = self.mathtree.Calc(**self.context)
606 except (ArithmeticError, ValueError):
607 y = None
608 dynamiccolumns[self.yname].append(y)
609 return dynamiccolumns
612 class paramfunction(_data):
614 defaultstyles = [style.line()]
616 def __init__(self, varname, min, max, expression, title=_notitle, points=100, parser=mathtree.parser(), context={}):
617 if title is _notitle:
618 self.title = expression
619 else:
620 self.title = title
621 varlist, expressionlist = expression.split("=")
622 keys = [key.strip() for key in varlist.split(",")]
623 mathtrees = parser.parse(expressionlist)
624 if len(keys) != len(mathtrees):
625 raise ValueError("unpack tuple of wrong size")
626 self.columns = dict([(key, []) for key in keys])
627 context = context.copy()
628 for i in range(points):
629 param = min + (max-min)*i / (points-1.0)
630 context[varname] = param
631 for key, mathtree in zip(keys, mathtrees):
632 try:
633 self.columns[key].append(mathtree.Calc(**context))
634 except (ArithmeticError, ValueError):
635 self.columns[key].append(None)