calculate maxcolumns once only
[PyX/mjg.git] / pyx / graph / data.py
blobce20862a695ec1df8d83ef840774c4888a5961d2
1 #!/usr/bin/env python
2 # -*- coding: ISO-8859-1 -*-
5 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
6 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
7 # Copyright (C) 2002-2004 André Wobst <wobsta@users.sourceforge.net>
9 # This file is part of PyX (http://pyx.sourceforge.net/).
11 # PyX is free software; you can redistribute it and/or modify
12 # it under the terms of the GNU General Public License as published by
13 # the Free Software Foundation; either version 2 of the License, or
14 # (at your option) any later version.
16 # PyX is distributed in the hope that it will be useful,
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 # GNU General Public License for more details.
21 # You should have received a copy of the GNU General Public License
22 # along with PyX; if not, write to the Free Software
23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 import re, ConfigParser
27 from pyx import mathtree
28 from pyx.graph import style
31 class _Idata:
32 """interface definition of a data object
33 data objects store data arranged in rows and columns"""
35 columns = {}
36 """a dictionary mapping column titles to column numbers"""
38 points = []
39 """column/row data
40 - a list of rows where each row represents a data point
41 - each row contains a list, where each entry of the list represents a value for a column
42 - the number of columns for each data point must match the number of columns
43 - any column enty of any data point might be a float, a string, or None"""
45 title = ""
46 """a string (for printing in PyX, e.g. in a graph key)
47 - None is allowed, which marks the data instance to have no title,
48 e.g. it should be skiped in a graph key etc.
49 - the title does need to be unique"""
51 def getcolumnnumber(self, column):
52 """returns a column number
53 - the column parameter might be an integer to be used as a column number
54 - a column number must be a valid list index (negative values are allowed)
55 - the column parameter might be a string contained in the columns list;
56 to be valid, the string must be unique within the columns list"""
58 def getcolumn(self, column):
59 """returns a column
60 - extracts a column out of self.data and returns it as a list
61 - the column is identified by the parameter column as in getcolumnnumber"""
64 class _data:
66 defaultstyle = style.symbol()
68 def getcolumnnumber(self, key):
69 try:
70 key + ""
71 except:
72 return key + 0
73 else:
74 return self.columns[key.strip()]
76 def getcolumn(self, key):
77 columnno = self.getcolumnnumber(key)
78 return [point[columnno] for point in self.points]
80 def setstyle(self, graph, style):
81 self.style = style
82 unhandledcolumns = self.style.setdata(graph, self.columns, self)
83 unhandledcolumnkeys = unhandledcolumns.keys()
84 if len(unhandledcolumnkeys):
85 raise ValueError("style couldn't handle column keys %s" % unhandledcolumnkeys)
87 def selectstyle(self, graph, selectindex, selecttotal):
88 self.style.selectstyle(selectindex, selecttotal, self)
90 def adjustaxes(self, graph, step):
91 """
92 - on step == 0 axes with fixed data should be adjusted
93 - on step == 1 the current axes ranges might be used to
94 calculate further data (e.g. y data for a function y=f(x)
95 where the y range depends on the x range)
96 - on step == 2 axes ranges not previously set should be
97 updated by data accumulated by step 1"""
98 if step == 0:
99 self.style.adjustaxes(self.columns.values(), self)
101 def draw(self, graph):
102 self.style.drawpoints(graph, self)
105 class list(_data):
106 "creates data out of a list"
108 def checkmaxcolumns(self, points, maxcolumns=None):
109 if maxcolumns is None:
110 maxcolumns = max([len(point) for point in points])
111 for i in xrange(len(points)):
112 l = len(points[i])
113 if l < maxcolumns:
114 try:
115 p = points[i] + [None] * (maxcolumns - l)
116 except:
117 # points[i] are not a list
118 p = __builtins__.list(points[i]) + [None] * (maxcolumns - l)
119 try:
120 points[i] = p
121 except:
122 # points are not a list -> end loop without step into else
123 break
124 else:
125 # the loop finished successfull
126 return points
127 # since points are not a list, convert them and try again
128 return checkmaxcolumns(__builtins__.list(points), maxcolumns=maxcolumns)
130 def __init__(self, points, title="user provided list", maxcolumns=None, addlinenumbers=1, **columns):
131 points = self.checkmaxcolumns(points, maxcolumns)
132 if addlinenumbers:
133 for i in xrange(len(points)):
134 try:
135 points[i].insert(0, i+1)
136 except:
137 points[i] = [i+1] + __builtins__.list(points[i])
138 self.points = points
139 self.columns = columns
140 self.title = title
143 ##############################################################
144 # math tree enhanced by column handling
145 ##############################################################
147 class MathTreeFuncCol(mathtree.MathTreeFunc1):
149 def __init__(self, *args):
150 mathtree.MathTreeFunc1.__init__(self, "_column_", *args)
152 def VarList(self):
153 # we misuse VarList here:
154 # - instead of returning a string, we return this instance itself
155 # - before calculating the expression, you must call ColumnNameAndNumber
156 # once (when limiting the context to external defined variables,
157 # otherwise you have to call it each time)
158 return [self]
160 def ColumnNameAndNumber(_hidden_self, **args):
161 number = int(_hidden_self.Args[0].Calc(**args))
162 _hidden_self.varname = "_column_%i" % number
163 return _hidden_self.varname, number
165 def __str__(self):
166 return self.varname
168 def Calc(_hidden_self, **args):
169 return args[_hidden_self.varname]
171 MathTreeFuncsWithCol = mathtree.DefaultMathTreeFuncs + [MathTreeFuncCol]
174 class columntree:
176 def __init__(self, tree):
177 self.tree = tree
178 self.Calc = tree.Calc
179 self.__str__ = tree.__str__
181 def VarList(self):
182 # returns a list of regular variables (strings) like the original mathtree
183 return [var for var in self.tree.VarList() if not isinstance(var, MathTreeFuncCol) and var[:8] != "_column_"]
185 def columndict(_hidden_self, **context):
186 # returns a dictionary of column names (keys) and column numbers (values)
187 columndict = {}
188 for var in _hidden_self.tree.VarList():
189 if isinstance(var, MathTreeFuncCol):
190 name, number = var.ColumnNameAndNumber(**context)
191 columndict[name] = number
192 elif var[:8] == "_column_":
193 columndict[var] = int(var[8:])
194 return columndict
197 class dataparser(mathtree.parser):
198 # mathtree parser enhanced by column handling
199 # parse returns a columntree instead of a regular tree
201 def __init__(self, MathTreeFuncs=MathTreeFuncsWithCol, **kwargs):
202 mathtree.parser.__init__(self, MathTreeFuncs=MathTreeFuncs, **kwargs)
204 def parse(self, expr):
205 return columntree(mathtree.parser.parse(self, expr.replace("$", "_column_")))
207 ##############################################################
210 class copycolumn:
211 # a helper storage class to mark a new column to copied
212 # out of data from an old column
213 def __init__(self, newcolumntitle, oldcolumnnumber):
214 self.newcolumntitle = newcolumntitle
215 self.oldcolumnnumber = oldcolumnnumber
217 class mathcolumn:
218 """a helper storage class to mark a new column to created
219 by evaluating a mathematical expression"""
220 def __init__(self, newcolumntitle, expression, tree, varitems):
221 # - expression is a string
222 # - tree is a parsed mathematical tree, e.g. we can have
223 # call tree.Calc(**vars), where the dict vars maps variable
224 # names to values
225 # - varitems is a list of (key, value) pairs, where the key
226 # stands is a variable name in the mathematical tree and
227 # the value is its value"""
228 self.newcolumntitle = newcolumntitle
229 self.expression = expression
230 self.tree = tree
231 self.varitems = varitems
233 class notitle:
234 """this is a helper class to mark, that no title was privided
235 (since a title equals None is a valid input, it needs to be
236 distinguished from providing no title when a title will be
237 created automatically)"""
238 pass
240 class data(_data):
241 "creates a new data set out of an existing data set"
243 def __init__(self, data, title=notitle, parser=dataparser(), context={}, **columns):
244 defaultstyle = data.defaultstyle
246 # build a nice title
247 if title is notitle:
248 items = columns.items()
249 items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
250 self.title = data.title + ": " + ", ".join(["%s=%s" % item for item in items])
251 else:
252 self.title = title
254 # analyse the **columns argument
255 newcolumns = []
256 hasmathcolumns = 0
257 for newcolumntitle, columnexpr in columns.items():
258 try:
259 # try if it is a valid column identifier
260 oldcolumnnumber = data.getcolumnnumber(columnexpr)
261 except:
262 # if not it should be a mathematical expression
263 tree = parser.parse(columnexpr)
264 columndict = tree.columndict(**context)
265 for var in tree.VarList():
266 try:
267 columndict[var] = data.getcolumnnumber(var)
268 except KeyError, e:
269 if var not in context.keys():
270 raise e
271 newcolumns.append(mathcolumn(newcolumntitle, columnexpr, tree, columndict.items()))
272 hasmathcolumns = 1
273 else:
274 newcolumns.append(copycolumn(newcolumntitle, oldcolumnnumber))
276 # ensure to copy the zeroth column (line number)
277 # if we already do, place it first again, otherwise add it to the front
278 i = 0
279 for newcolumn in newcolumns:
280 if isinstance(newcolumn, copycolumn) and not newcolumn.oldcolumnnumber:
281 newcolumns.pop(i)
282 newcolumns.insert(0, newcolumn)
283 firstcolumnwithtitle = 0
284 break
285 i += 1
286 else:
287 newcolumns.insert(0, copycolumn(None, 0))
288 firstcolumnwithtitle = 1
290 if hasmathcolumns:
291 # new column data needs to be calculated
292 vars = context.copy() # do not modify context, use a copy vars instead
293 self.points = [None]*len(data.points)
294 countcolumns = len(newcolumns)
295 for i in xrange(len(data.points)):
296 datapoint = data.points[i]
297 point = [None]*countcolumns
298 newcolumnnumber = 0
299 for newcolumn in newcolumns:
300 if isinstance(newcolumn, copycolumn):
301 point[newcolumnnumber] = datapoint[newcolumn.oldcolumnnumber]
302 else:
303 # update the vars
304 # TODO: we could update it once for all varitems
305 for newcolumntitle, value in newcolumn.varitems:
306 vars[newcolumntitle] = datapoint[value]
307 point[newcolumnnumber] = newcolumn.tree.Calc(**vars)
308 # we could also do:
309 # point[newcolumnnumber] = eval(str(newcolumn.tree), vars)
310 newcolumnnumber += 1
311 self.points[i] = point
313 # store the column titles
314 self.columns = {}
315 newcolumnnumber = firstcolumnwithtitle
316 for newcolumn in newcolumns[firstcolumnwithtitle:]:
317 self.columns[newcolumn.newcolumntitle] = newcolumnnumber
318 newcolumnnumber += 1
319 else:
320 # since only column copies are needed, we can share the original points
321 self.points = data.points
323 # store the new column titles
324 self.columns = {}
325 for newcolumn in newcolumns[firstcolumnwithtitle:]:
326 self.columns[newcolumn.newcolumntitle] = newcolumn.oldcolumnnumber
329 filecache = {}
331 class file(data):
333 defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
334 defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
335 defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
337 def splitline(self, line, stringpattern, columnpattern, tofloat=1):
338 """returns a tuple created out of the string line
339 - matches stringpattern and columnpattern, adds the first group of that
340 match to the result and and removes those matches until the line is empty
341 - when stringpattern matched, the result is always kept as a string
342 - when columnpattern matched and tofloat is true, a conversion to a float
343 is tried; when this conversion fails, the string is kept"""
344 result = []
345 # try to gain speed by skip matching regular expressions
346 if line.find('"')!=-1 or \
347 stringpattern is not self.defaultstringpattern or \
348 columnpattern is not self.defaultcolumnpattern:
349 while len(line):
350 match = stringpattern.match(line)
351 if match:
352 result.append(match.groups()[0])
353 line = line[match.end():]
354 else:
355 match = columnpattern.match(line)
356 if tofloat:
357 try:
358 result.append(float(match.groups()[0]))
359 except (TypeError, ValueError):
360 result.append(match.groups()[0])
361 else:
362 result.append(match.groups()[0])
363 line = line[match.end():]
364 else:
365 if tofloat:
366 try:
367 return map(float, line.split())
368 except (TypeError, ValueError):
369 result = []
370 for r in line.split():
371 try:
372 result.append(float(r))
373 except (TypeError, ValueError):
374 result.append(r)
375 else:
376 return line.split()
377 return result
379 def getcachekey(self, *args):
380 return ":".join([str(x) for x in args])
382 def __init__(self, filename,
383 commentpattern=defaultcommentpattern,
384 stringpattern=defaultstringpattern,
385 columnpattern=defaultcolumnpattern,
386 skiphead=0, skiptail=0, every=1,
387 **kwargs):
388 cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
389 if not filecache.has_key(cachekey):
390 file = open(filename)
391 self.title = filename
392 columns = {}
393 points = []
394 linenumber = 0
395 maxcolumns = 0
396 for line in file.readlines():
397 line = line.strip()
398 match = commentpattern.match(line)
399 if match:
400 if not len(points):
401 keys = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
402 i = 0
403 for key in keys:
404 i += 1
405 columns[key] = i
406 else:
407 linedata = []
408 for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
409 linedata.append(value)
410 if len(linedata):
411 if linenumber >= skiphead and not ((linenumber - skiphead) % every):
412 linedata = [linenumber + 1] + linedata
413 if len(linedata) > maxcolumns:
414 maxcolumns = len(linedata)
415 points.append(linedata)
416 linenumber += 1
417 if skiptail:
418 del points[-skiptail:]
419 filecache[cachekey] = list(points, title=filename, maxcolumns=maxcolumns, addlinenumbers=0, **columns)
420 data.__init__(self, filecache[cachekey], **kwargs)
423 conffilecache = {}
425 class conffile(data):
427 def __init__(self, filename, **kwargs):
428 """read data from a config-like file
429 - filename is a string
430 - each row is defined by a section in the config-like file (see
431 config module description)
432 - the columns for each row are defined by lines in the section file;
433 the option entries identify and name the columns
434 - further keyword arguments are passed to the constructor of data,
435 keyword arguments data and titles excluded"""
436 cachekey = filename
437 if not filecache.has_key(cachekey):
438 config = ConfigParser.ConfigParser()
439 config.optionxform = str
440 config.readfp(open(filename, "r"))
441 sections = config.sections()
442 sections.sort()
443 points = [None]*len(sections)
444 maxcolumns = 1
445 columns = {}
446 for i in xrange(len(sections)):
447 point = [sections[i]] + [None]*(maxcolumns-1)
448 for option in config.options(sections[i]):
449 value = config.get(sections[i], option)
450 try:
451 value = float(value)
452 except:
453 pass
454 try:
455 index = columns[option]
456 except KeyError:
457 columns[option] = maxcolumns
458 point.append(value)
459 maxcolumns += 1
460 else:
461 point[index] = value
462 points[i] = point
463 conffilecache[cachekey] = list(points, title=filename, maxcolumns=maxcolumns, addlinenumbers=0, **columns)
464 data.__init__(self, conffilecache[cachekey], **kwargs)
468 class function:
470 defaultstyle = style.line()
472 def __init__(self, expression, title=notitle, min=None, max=None,
473 points=100, parser=mathtree.parser(), context={}):
475 if title is notitle:
476 self.title = expression
477 else:
478 self.title = title
479 self.min = min
480 self.max = max
481 self.numberofpoints = points
482 self.context = context.copy() # be save on late evaluations
483 self.result, expression = [x.strip() for x in expression.split("=")]
484 self.mathtree = parser.parse(expression)
485 self.variable = None
487 def setstyle(self, graph, style):
488 self.style = style
489 for variable in self.mathtree.VarList():
490 if variable in graph.axes.keys():
491 if self.variable is None:
492 self.variable = variable
493 else:
494 raise ValueError("multiple variables found")
495 if self.variable is None:
496 raise ValueError("no variable found")
497 self.xaxis = graph.axes[self.variable]
498 self.columns = {self.variable: 1, self.result: 2}
499 unhandledcolumns = self.style.setdata(graph, self.columns, self)
500 unhandledcolumnkeys = unhandledcolumns.keys()
501 if len(unhandledcolumnkeys):
502 raise ValueError("style couldn't handle column keys %s" % unhandledcolumnkeys)
504 def selectstyle(self, graph, selectindex, selecttotal):
505 self.style.selectstyle(selectindex, selecttotal, self)
507 def adjustaxes(self, graph, step):
509 - on step == 0 axes with fixed data should be adjusted
510 - on step == 1 the current axes ranges might be used to
511 calculate further data (e.g. y data for a function y=f(x)
512 where the y range depends on the x range)
513 - on step == 2 axes ranges not previously set should be
514 updated by data accumulated by step 1"""
515 if step == 0:
516 self.points = []
517 if self.min is not None:
518 self.points.append([None, self.min])
519 if self.max is not None:
520 self.points.append([None, self.max])
521 self.style.adjustaxes([1], self)
522 elif step == 1:
523 min, max = graph.axes[self.variable].getrange()
524 if self.min is not None: min = self.min
525 if self.max is not None: max = self.max
526 vmin = self.xaxis.convert(min)
527 vmax = self.xaxis.convert(max)
528 self.points = []
529 for i in range(self.numberofpoints):
530 v = vmin + (vmax-vmin)*i / (self.numberofpoints-1.0)
531 x = self.xaxis.invert(v)
532 # caution: the virtual coordinate might differ once
533 # the axis rescales itself to include further ticks etc.
534 self.points.append([v, x, None])
535 for point in self.points:
536 self.context[self.variable] = point[1]
537 try:
538 point[2] = self.mathtree.Calc(**self.context)
539 except (ArithmeticError, ValueError):
540 pass
541 elif step == 2:
542 self.style.adjustaxes([2], self)
544 def draw(self, graph):
545 self.style.drawpoints(graph, self)
548 class paramfunction:
550 defaultstyle = style.line()
552 def __init__(self, varname, min, max, expression, title=notitle, points=100, parser=mathtree.parser(), context={}):
553 if title is notitle:
554 self.title = expression
555 else:
556 self.title = title
557 self.varname = varname
558 self.min = min
559 self.max = max
560 self.numberofpoints = points
561 self.expression = {}
562 varlist, expressionlist = expression.split("=")
563 keys = varlist.split(",")
564 mathtrees = parser.parse(expressionlist)
565 if len(keys) != len(mathtrees):
566 raise ValueError("unpack tuple of wrong size")
567 self.points = [None]*self.numberofpoints
568 emptyresult = [None]*len(keys)
569 self.columns = {}
570 i = 1
571 for key in keys:
572 self.columns[key.strip()] = i
573 i += 1
574 for i in range(self.numberofpoints):
575 param = self.min + (self.max-self.min)*i / (self.numberofpoints-1.0)
576 context[self.varname] = param
577 self.points[i] = [param] + emptyresult
578 column = 1
579 for key, column in self.columns.items():
580 self.points[i][column] = mathtrees[column-1].Calc(**context)
581 column += 1
583 def setstyle(self, graph, style):
584 self.style = style
585 unhandledcolumns = self.style.setdata(graph, self.columns, self)
586 unhandledcolumnkeys = unhandledcolumns.keys()
587 if len(unhandledcolumnkeys):
588 raise ValueError("style couldn't handle column keys %s" % unhandledcolumnkeys)
590 def selectstyle(self, graph, selectindex, selecttotal):
591 self.style.selectstyle(selectindex, selecttotal, self)
593 def adjustaxes(self, graph, step):
594 if step == 0:
595 self.style.adjustaxes(self.columns.values(), self)
597 def draw(self, graph):
598 self.style.drawpoints(graph, self)