graph.data: conffile supported restored, unit test restored and cleanup
[PyX/mjg.git] / pyx / graph / data.py
blobd475ea04183d12855f6391a8020d2c38251b0775
1 #!/usr/bin/env python
2 # -*- coding: ISO-8859-1 -*-
5 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
6 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
7 # Copyright (C) 2002-2004 André Wobst <wobsta@users.sourceforge.net>
9 # This file is part of PyX (http://pyx.sourceforge.net/).
11 # PyX is free software; you can redistribute it and/or modify
12 # it under the terms of the GNU General Public License as published by
13 # the Free Software Foundation; either version 2 of the License, or
14 # (at your option) any later version.
16 # PyX is distributed in the hope that it will be useful,
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 # GNU General Public License for more details.
21 # You should have received a copy of the GNU General Public License
22 # along with PyX; if not, write to the Free Software
23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 import re, ConfigParser
27 from pyx import mathtree
28 from pyx.graph import style
31 class _Idata:
32 """interface definition of a data object
33 data objects store data arranged in rows and columns"""
35 columns = {}
36 """a dictionary mapping column titles to column numbers"""
38 points = []
39 """column/row data
40 - a list of rows where each row represents a data point
41 - each row contains a list, where each entry of the list represents a value for a column
42 - the number of columns for each data point must match the number of columns
43 - any column enty of any data point might be a float, a string, or None"""
45 title = ""
46 """a string (for printing in PyX, e.g. in a graph key)
47 - None is allowed, which marks the data instance to have no title,
48 e.g. it should be skiped in a graph key etc.
49 - the title does need to be unique"""
51 def getcolumnnumber(self, column):
52 """returns a column number
53 - the column parameter might be an integer to be used as a column number
54 - a column number must be a valid list index (negative values are allowed)
55 - the column parameter might be a string contained in the columns list;
56 to be valid, the string must be unique within the columns list"""
58 def getcolumn(self, column):
59 """returns a column
60 - extracts a column out of self.data and returns it as a list
61 - the column is identified by the parameter column as in getcolumnnumber"""
64 class _data:
66 defaultstyle = style.symbol()
68 def getcolumnnumber(self, key):
69 try:
70 key + ""
71 except:
72 return key + 0
73 else:
74 return self.columns[key.strip()]
76 def getcolumn(self, key):
77 columnno = self.getcolumnnumber(key)
78 return [point[columnno] for point in self.points]
80 def setstyle(self, graph, style):
81 self.style = style
82 unhandledcolumns = self.style.setdata(graph, self.columns, self)
83 unhandledcolumnkeys = unhandledcolumns.keys()
84 if len(unhandledcolumnkeys):
85 raise ValueError("style couldn't handle column keys %s" % unhandledcolumnkeys)
87 def selectstyle(self, graph, selectindex, selecttotal):
88 self.style.selectstyle(selectindex, selecttotal, self)
90 def adjustaxes(self, graph, step):
91 """
92 - on step == 0 axes with fixed data should be adjusted
93 - on step == 1 the current axes ranges might be used to
94 calculate further data (e.g. y data for a function y=f(x)
95 where the y range depends on the x range)
96 - on step == 2 axes ranges not previously set should be
97 updated by data accumulated by step 1"""
98 if step == 0:
99 self.style.adjustaxes(self.columns.values(), self)
101 def draw(self, graph):
102 self.style.drawpoints(graph, self)
105 class list(_data):
106 "creates data out of points"
108 def __init__(self, points, title="unknown", maxcolumns=None, addlinenumbers=1, **columns):
109 if maxcolumns is None and len(points):
110 maxcolumns = max([len(point) for point in points])
111 if addlinenumbers:
112 for i in xrange(len(points)):
113 points[i] = [i+1] + points[i] + [None] * (maxcolumns - len(points[i]))
114 else:
115 for i in xrange(len(points)):
116 points[i] = points[i] + [None] * (maxcolumns - len(points[i]))
117 self.points = points
118 self.columns = columns
119 self.title = title
122 ##############################################################
123 # math tree enhanced by column handling
124 ##############################################################
126 class MathTreeFuncCol(mathtree.MathTreeFunc1):
128 def __init__(self, *args):
129 mathtree.MathTreeFunc1.__init__(self, "_column_", *args)
131 def VarList(self):
132 # we misuse VarList here:
133 # - instead of returning a string, we return this instance itself
134 # - before calculating the expression, you must call ColumnNameAndNumber
135 # once (when limiting the context to external defined variables,
136 # otherwise you have to call it each time)
137 return [self]
139 def ColumnNameAndNumber(_hidden_self, **args):
140 number = int(_hidden_self.Args[0].Calc(**args))
141 _hidden_self.varname = "_column_%i" % number
142 return _hidden_self.varname, number
144 def __str__(self):
145 return self.varname
147 def Calc(_hidden_self, **args):
148 return args[_hidden_self.varname]
150 MathTreeFuncsWithCol = mathtree.DefaultMathTreeFuncs + [MathTreeFuncCol]
153 class columntree:
155 def __init__(self, tree):
156 self.tree = tree
157 self.Calc = tree.Calc
158 self.__str__ = tree.__str__
160 def VarList(self):
161 # returns a list of regular variables (strings) like the original mathtree
162 return [var for var in self.tree.VarList() if not isinstance(var, MathTreeFuncCol) and var[:8] != "_column_"]
164 def columndict(_hidden_self, **context):
165 # returns a dictionary of column names (keys) and column numbers (values)
166 columndict = {}
167 for var in _hidden_self.tree.VarList():
168 if isinstance(var, MathTreeFuncCol):
169 name, number = var.ColumnNameAndNumber(**context)
170 columndict[name] = number
171 elif var[:8] == "_column_":
172 columndict[var] = int(var[8:])
173 return columndict
176 class dataparser(mathtree.parser):
177 # mathtree parser enhanced by column handling
178 # parse returns a columntree instead of a regular tree
180 def __init__(self, MathTreeFuncs=MathTreeFuncsWithCol, **kwargs):
181 mathtree.parser.__init__(self, MathTreeFuncs=MathTreeFuncs, **kwargs)
183 def parse(self, expr):
184 return columntree(mathtree.parser.parse(self, expr.replace("$", "_column_")))
186 ##############################################################
189 class copycolumn:
190 # a helper storage class to mark a new column to copied
191 # out of data from an old column
192 def __init__(self, newcolumntitle, oldcolumnnumber):
193 self.newcolumntitle = newcolumntitle
194 self.oldcolumnnumber = oldcolumnnumber
196 class mathcolumn:
197 """a helper storage class to mark a new column to created
198 by evaluating a mathematical expression"""
199 def __init__(self, newcolumntitle, expression, tree, varitems):
200 # - expression is a string
201 # - tree is a parsed mathematical tree, e.g. we can have
202 # call tree.Calc(**vars), where the dict vars maps variable
203 # names to values
204 # - varitems is a list of (key, value) pairs, where the key
205 # stands is a variable name in the mathematical tree and
206 # the value is its value"""
207 self.newcolumntitle = newcolumntitle
208 self.expression = expression
209 self.tree = tree
210 self.varitems = varitems
212 class notitle:
213 """this is a helper class to mark, that no title was privided
214 (since a title equals None is a valid input, it needs to be
215 distinguished from providing no title when a title will be
216 created automatically)"""
217 pass
219 class data(_data):
220 "creates a new data set out of an existing data set"
222 def __init__(self, data, title=notitle, parser=dataparser(), context={}, **columns):
223 defaultstyle = data.defaultstyle
225 # build a nice title
226 if title is notitle:
227 items = columns.items()
228 items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
229 self.title = data.title + ": " + ", ".join(["%s=%s" % item for item in items])
230 else:
231 self.title = title
233 # analyse the **columns argument
234 newcolumns = []
235 hasmathcolumns = 0
236 for newcolumntitle, columnexpr in columns.items():
237 try:
238 # try if it is a valid column identifier
239 oldcolumnnumber = data.getcolumnnumber(columnexpr)
240 except:
241 # if not it should be a mathematical expression
242 tree = parser.parse(columnexpr)
243 columndict = tree.columndict(**context)
244 for var in tree.VarList():
245 try:
246 columndict[var] = data.getcolumnnumber(var)
247 except KeyError, e:
248 if var not in context.keys():
249 raise e
250 newcolumns.append(mathcolumn(newcolumntitle, columnexpr, tree, columndict.items()))
251 hasmathcolumns = 1
252 else:
253 newcolumns.append(copycolumn(newcolumntitle, oldcolumnnumber))
255 # ensure to copy the zeroth column (line number)
256 # if we already do, place it first again, otherwise add it to the front
257 i = 0
258 for newcolumn in newcolumns:
259 if isinstance(newcolumn, copycolumn) and not newcolumn.oldcolumnnumber:
260 newcolumns.pop(i)
261 newcolumns.insert(0, newcolumn)
262 firstcolumnwithtitle = 0
263 break
264 i += 1
265 else:
266 newcolumns.insert(0, copycolumn(None, 0))
267 firstcolumnwithtitle = 1
269 if hasmathcolumns:
270 # new column data needs to be calculated
271 vars = context.copy() # do not modify context, use a copy vars instead
272 self.points = [None]*len(data.points)
273 countcolumns = len(newcolumns)
274 for i in xrange(len(data.points)):
275 datapoint = data.points[i]
276 point = [None]*countcolumns
277 newcolumnnumber = 0
278 for newcolumn in newcolumns:
279 if isinstance(newcolumn, copycolumn):
280 point[newcolumnnumber] = datapoint[newcolumn.oldcolumnnumber]
281 else:
282 # update the vars
283 # TODO: we could update it once for all varitems
284 for newcolumntitle, value in newcolumn.varitems:
285 vars[newcolumntitle] = datapoint[value]
286 point[newcolumnnumber] = newcolumn.tree.Calc(**vars)
287 # we could also do:
288 # point[newcolumnnumber] = eval(str(newcolumn.tree), vars)
289 newcolumnnumber += 1
290 self.points[i] = point
292 # store the column titles
293 self.columns = {}
294 newcolumnnumber = firstcolumnwithtitle
295 for newcolumn in newcolumns[firstcolumnwithtitle:]:
296 self.columns[newcolumn.newcolumntitle] = newcolumnnumber
297 newcolumnnumber += 1
298 else:
299 # since only column copies are needed, we can share the original points
300 self.points = data.points
302 # store the new column titles
303 self.columns = {}
304 for newcolumn in newcolumns[firstcolumnwithtitle:]:
305 self.columns[newcolumn.newcolumntitle] = newcolumn.oldcolumnnumber
308 filecache = {}
310 class file(data):
312 defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
313 defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
314 defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
316 def splitline(self, line, stringpattern, columnpattern, tofloat=1):
317 """returns a tuple created out of the string line
318 - matches stringpattern and columnpattern, adds the first group of that
319 match to the result and and removes those matches until the line is empty
320 - when stringpattern matched, the result is always kept as a string
321 - when columnpattern matched and tofloat is true, a conversion to a float
322 is tried; when this conversion fails, the string is kept"""
323 result = []
324 # try to gain speed by skip matching regular expressions
325 if line.find('"')!=-1 or \
326 stringpattern is not self.defaultstringpattern or \
327 columnpattern is not self.defaultcolumnpattern:
328 while len(line):
329 match = stringpattern.match(line)
330 if match:
331 result.append(match.groups()[0])
332 line = line[match.end():]
333 else:
334 match = columnpattern.match(line)
335 if tofloat:
336 try:
337 result.append(float(match.groups()[0]))
338 except (TypeError, ValueError):
339 result.append(match.groups()[0])
340 else:
341 result.append(match.groups()[0])
342 line = line[match.end():]
343 else:
344 if tofloat:
345 try:
346 return map(float, line.split())
347 except (TypeError, ValueError):
348 result = []
349 for r in line.split():
350 try:
351 result.append(float(r))
352 except (TypeError, ValueError):
353 result.append(r)
354 else:
355 return line.split()
356 return result
358 def getcachekey(self, *args):
359 return ":".join([str(x) for x in args])
361 def __init__(self, filename,
362 commentpattern=defaultcommentpattern,
363 stringpattern=defaultstringpattern,
364 columnpattern=defaultcolumnpattern,
365 skiphead=0, skiptail=0, every=1,
366 **kwargs):
367 cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
368 if not filecache.has_key(cachekey):
369 file = open(filename)
370 self.title = filename
371 columns = {}
372 points = []
373 linenumber = 0
374 maxcolumns = 0
375 for line in file.readlines():
376 line = line.strip()
377 match = commentpattern.match(line)
378 if match:
379 if not len(points):
380 keys = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
381 i = 0
382 for key in keys:
383 i += 1
384 columns[key] = i
385 else:
386 linedata = []
387 for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
388 linedata.append(value)
389 if len(linedata):
390 if linenumber >= skiphead and not ((linenumber - skiphead) % every):
391 linedata = [linenumber + 1] + linedata
392 if len(linedata) > maxcolumns:
393 maxcolumns = len(linedata)
394 points.append(linedata)
395 linenumber += 1
396 if skiptail:
397 del points[-skiptail:]
398 filecache[cachekey] = list(points, title=filename, maxcolumns=maxcolumns, addlinenumbers=0, **columns)
399 data.__init__(self, filecache[cachekey], **kwargs)
402 conffilecache = {}
404 class conffile(data):
406 def __init__(self, filename, **kwargs):
407 """read data from a config-like file
408 - filename is a string
409 - each row is defined by a section in the config-like file (see
410 config module description)
411 - the columns for each row are defined by lines in the section file;
412 the option entries identify and name the columns
413 - further keyword arguments are passed to the constructor of data,
414 keyword arguments data and titles excluded"""
415 cachekey = filename
416 if not filecache.has_key(cachekey):
417 config = ConfigParser.ConfigParser()
418 config.optionxform = str
419 config.readfp(open(filename, "r"))
420 sections = config.sections()
421 sections.sort()
422 points = [None]*len(sections)
423 maxcolumns = 1
424 columns = {}
425 for i in xrange(len(sections)):
426 point = [sections[i]] + [None]*(maxcolumns-1)
427 for option in config.options(sections[i]):
428 value = config.get(sections[i], option)
429 try:
430 value = float(value)
431 except:
432 pass
433 try:
434 index = columns[option]
435 except KeyError:
436 columns[option] = maxcolumns
437 point.append(value)
438 maxcolumns += 1
439 else:
440 point[index] = value
441 points[i] = point
442 conffilecache[cachekey] = list(points, title=filename, maxcolumns=maxcolumns, addlinenumbers=0, **columns)
443 data.__init__(self, conffilecache[cachekey], **kwargs)
447 class function:
449 defaultstyle = style.line()
451 def __init__(self, expression, title=notitle, min=None, max=None, points=100, parser=mathtree.parser(), context={}):
452 if title is notitle:
453 self.title = expression
454 else:
455 self.title = title
456 self.min = min
457 self.max = max
458 self.nopoints = points
459 self.context = context
460 self.result, expression = [x.strip() for x in expression.split("=")]
461 self.mathtree = parser.parse(expression)
462 self.variable = None
464 def setstyle(self, graph, style):
465 self.style = style
466 for variable in self.mathtree.VarList():
467 if variable in graph.axes.keys():
468 if self.variable is None:
469 self.variable = variable
470 else:
471 raise ValueError("multiple variables found")
472 if self.variable is None:
473 raise ValueError("no variable found")
474 self.xaxis = graph.axes[self.variable]
475 unhandledcolumns = self.style.setdata(graph, {self.variable: 0, self.result: 1}, self)
476 unhandledcolumnkeys = unhandledcolumns.keys()
477 if len(unhandledcolumnkeys):
478 raise ValueError("style couldn't handle column keys %s" % unhandledcolumnkeys)
480 def selectstyle(self, graph, selectindex, selecttotal):
481 self.style.selectstyle(selectindex, selecttotal, self)
483 def adjustaxes(self, graph, step):
485 - on step == 0 axes with fixed data should be adjusted
486 - on step == 1 the current axes ranges might be used to
487 calculate further data (e.g. y data for a function y=f(x)
488 where the y range depends on the x range)
489 - on step == 2 axes ranges not previously set should be
490 updated by data accumulated by step 1"""
491 if step == 0:
492 min, max = graph.axes[self.variable].getrange()
493 if self.min is not None: min = self.min
494 if self.max is not None: max = self.max
495 vmin = self.xaxis.convert(min)
496 vmax = self.xaxis.convert(max)
497 self.points = []
498 for i in range(self.nopoints):
499 x = self.xaxis.invert(vmin + (vmax-vmin)*i / (self.nopoints-1.0))
500 self.points.append([x])
501 self.style.adjustaxes([0], self)
502 elif step == 1:
503 for point in self.points:
504 self.context[self.variable] = point[0]
505 try:
506 point.append(self.mathtree.Calc(**self.context))
507 except (ArithmeticError, ValueError):
508 point.append(None)
509 elif step == 2:
510 self.style.adjustaxes([1], self)
512 def draw(self, graph):
513 self.style.drawpoints(graph, self)
516 class paramfunction:
518 defaultstyle = style.line()
520 def __init__(self, varname, min, max, expression, title=notitle, points=100, parser=mathtree.parser(), context={}):
521 if title is notitle:
522 self.title = expression
523 else:
524 self.title = title
525 self.varname = varname
526 self.min = min
527 self.max = max
528 self.nopoints = points
529 self.expression = {}
530 self.mathtrees = {}
531 varlist, expressionlist = expression.split("=")
532 keys = varlist.split(",")
533 mtrees = parser.parse(expressionlist)
534 if len(keys) != len(mtrees):
535 raise ValueError("unpack tuple of wrong size")
536 for i in range(len(keys)):
537 key = keys[i].strip()
538 if self.mathtrees.has_key(key):
539 raise ValueError("multiple assignment in tuple")
540 self.mathtrees[key] = mtrees[i]
541 if len(keys) != len(self.mathtrees.keys()):
542 raise ValueError("unpack tuple of wrong size")
543 self.points = []
544 for i in range(self.nopoints):
545 context[self.varname] = self.min + (self.max-self.min)*i / (self.nopoints-1.0)
546 line = []
547 for key, tree in self.mathtrees.items():
548 line.append(tree.Calc(**context))
549 self.points.append(line)
551 def setstyle(self, graph, style):
552 self.style = style
553 columns = {}
554 index = 0
555 for key in self.mathtrees.keys():
556 columns[key] = index
557 index += 1
558 unhandledcolumns = self.style.setdata(graph, columns, self)
559 unhandledcolumnkeys = unhandledcolumns.keys()
560 if len(unhandledcolumnkeys):
561 raise ValueError("style couldn't handle column keys %s" % unhandledcolumnkeys)
563 def selectstyle(self, graph, selectindex, selecttotal):
564 self.style.selectstyle(selectindex, selecttotal, self)
566 def adjustaxes(self, graph, step):
567 if step == 0:
568 self.style.adjustaxes(range(len(self.mathtrees.items())), self)
570 def draw(self, graph):
571 self.style.drawpoints(graph, self)