fix LaTeX breakage
[PyX/mjg.git] / pyx / data.py
blob078c000fdd7d8e6fc0b4edebd1de3e10ae0f14f1
1 #!/usr/bin/env python
2 # -*- coding: ISO-8859-1 -*-
5 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
6 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
7 # Copyright (C) 2002-2004 André Wobst <wobsta@users.sourceforge.net>
9 # This file is part of PyX (http://pyx.sourceforge.net/).
11 # PyX is free software; you can redistribute it and/or modify
12 # it under the terms of the GNU General Public License as published by
13 # the Free Software Foundation; either version 2 of the License, or
14 # (at your option) any later version.
16 # PyX is distributed in the hope that it will be useful,
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 # GNU General Public License for more details.
21 # You should have received a copy of the GNU General Public License
22 # along with PyX; if not, write to the Free Software
23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 import re, ConfigParser
27 import helper, mathtree
30 class ColumnError(Exception): pass
32 # XXX: for new mathtree parser
33 class MathTreeFuncCol(mathtree.MathTreeValVar):
35 def __init__(self, *args):
36 self.name = "_col_"
37 self.VarName = None
38 mathtree.MathTreeValVar.__init__(self, *args)
40 def VarList(self):
41 return [self]
43 def ColNo(HIDDEN_self, **args):
44 i = int(HIDDEN_self.Args[0].Calc(**args))
45 HIDDEN_self.VarName = "_col_%d" % (i)
46 return i
48 def Calc(HIDDEN_self, **args):
49 return mathtree.MathTreeValVar(HIDDEN_self.VarName).Calc(**args)
51 MathTreeFuncsWithCol = list(mathtree.DefaultMathTreeFuncs) + [MathTreeFuncCol]
52 # XXX: end of snip for new mathtree-parser
53 # XXX: begin of snip for old mathtree-parser
54 ColPattern = re.compile(r"\$(\(-?[0-9]+\)|-?[0-9]+)")
56 class MathTreeValCol(mathtree.MathTreeValVar):
57 """column id pattern like "$1" or "$(1)"
58 defines a new value pattern to identify columns by its number"""
60 # __implements__ = ... # TODO: mathtree interfaces
62 def InitByParser(self, arg):
63 Match = arg.MatchPattern(ColPattern)
64 if Match:
65 # just store the matched string -> handle this variable name later on
66 self.AddArg(Match)
67 return 1
70 # extent the list of possible values by MathTreeValCol
71 MathTreeValsWithCol = tuple(list(mathtree.DefaultMathTreeVals) + [MathTreeValCol])
72 # XXX: end of snip for old mathtree-parser
75 class _Idata:
76 """interface definition of a data object
77 data objects store data arranged in rows and columns"""
79 titles = []
80 """column titles
81 - a list of strings storing the column titles
82 - the length of the list must match the number of columns
83 - any titles entry might be None, thus explicitly not providing a column title"""
85 data = []
86 """column/row data
87 - a list of rows where each row represents a data point
88 - each row contains a list, where each entry of the list represents a value for a column
89 - the number of columns for each data point must match the number of columns
90 - any column enty of any data point might be a float, a string, or None"""
92 def getcolumnno(self, column):
93 """returns a column number
94 - the column parameter might be an integer to be used as a column number
95 - a column number must be a valid list index (negative values are allowed)
96 - the column parameter might be a string contained in the titles list;
97 to be valid, the string must be unique within the titles list
98 - the method raises ColumnError when the value of the column parameter is invalid"""
100 def getcolumn(self, column):
101 """returns a column
102 - extracts a column out of self.data and returns it as a list
103 - the column is identified by the parameter column as in getcolumnno"""
105 def addcolumn(self, expression, context={}):
106 """adds a column defined by a mathematical expression
107 - evaluates the expression for each data row and adds a new column at
108 the end of each data row
109 - the expression must be a valid mathtree expression (see module mathtree)
110 with an extended variable name syntax: strings like "$i" and "$(i)" are
111 allowed where i is an integer
112 - a variable of the mathematical expression might either be a column title
113 or, by the extended variable name syntax, it defines an integer to be used
114 as a list index within the column list for each row
115 - context is a dictionary, where external variables and functions can be
116 given; those are used in the evaluation of the expression
117 - when the expression contains the character "=", everything after the last
118 "=" is interpreted as the mathematical expression while everything before
119 this character will be used as a column title for the new column; when no
120 "=" is contained in the expression, the hole expression is taken as the
121 mathematical expression and the column title is set to None"""
124 class _data:
126 """an (minimal) implementor of _Idata
127 other classes providing _Idata might be based on is class"""
129 __implements__ = _Idata
131 def __init__(self, data, titles, parser=None):
132 """initializes an instance
133 - data and titles are just set as instance variables without further checks ---
134 they must be valid in terms of _Idata (expecially their sizes must fit)
135 - parser is used in addcolumn and thus must implement the expression parsing as
136 defined in _Idata"""
137 if parser == None:
138 if mathtree.__useparser__ == mathtree.__oldparser__:
139 parser=mathtree.parser(MathTreeVals=mathtree.DefaultMathTreeVals+MathTreeValsWithCol)
140 if mathtree.__useparser__ == mathtree.__newparser__:
141 parser=mathtree.parser(MathTreeFuncs=MathTreeFuncsWithCol)
142 self.data = data
143 self.titles = titles
144 self.parser = parser
146 def getcolumnno(self, column):
147 if helper.isstring(column) and self.titles.count(column) == 1:
148 return self.titles.index(column)
149 try:
150 self.titles[column]
151 except (TypeError, IndexError, ValueError):
152 raise ColumnError
153 return column
155 def getcolumn(self, column):
156 columnno = self.getcolumnno(column)
157 return [x[columnno] for x in self.data]
159 def addcolumn(self, expression, context={}):
160 try:
161 split = expression.rindex("=")
162 except ValueError:
163 self.titles.append(None)
164 else:
165 self.titles.append(expression[:split])
166 expression = expression[split+1:]
167 tree = self.parser.parse(expression)
168 columnlist = {}
169 varlist = context.copy() # do not modify context
170 if mathtree.__useparser__ == mathtree.__newparser__: # XXX: switch between mathtree-parsers
171 for key in tree.VarList():
172 if isinstance(key, MathTreeFuncCol):
173 column = int(key.ColNo(**varlist))
174 try:
175 self.titles[column]
176 except:
177 raise ColumnError
178 columnlist["_col_%d" % (column)] = column
179 elif key[0:5] == "_col_":
180 column = int(key[5:])
181 try:
182 self.titles[column]
183 except:
184 raise ColumnError
185 columnlist[key] = column
186 else:
187 try:
188 columnlist[key] = self.getcolumnno(key)
189 except ColumnError, e:
190 if key not in context.keys():
191 raise e
192 else:
193 for key in tree.VarList():
194 if key[0] == "$":
195 if key[1] == "(":
196 column = int(key[2:-1])
197 else:
198 column = int(key[1:])
199 try:
200 self.titles[column]
201 except:
202 raise ColumnError
203 columnlist[key] = column
204 else:
205 try:
206 columnlist[key] = self.getcolumnno(key)
207 except ColumnError, e:
208 if key not in context.keys():
209 raise e
211 for data in self.data:
212 try:
213 for key in columnlist.keys():
214 varlist[key] = float(data[columnlist[key]])
215 except (TypeError, ValueError):
216 data.append(None)
217 else:
218 data.append(tree.Calc(**varlist))
221 class data(_data):
223 "an implementation of _Idata with an easy to use constructor"
225 __implements__ = _Idata
227 def __init__(self, data=[], titles=[], maxcolumns=helper.nodefault, **kwargs):
228 """initializes an instance
229 - data titles must be valid in terms of _Idata except for the number of
230 columns for each row, especially titles might be the default, e.g. []
231 - instead of lists for data, each row in data, and titles, tuples or
232 any other data structure with sequence like behavior might be used,
233 but they are converted to lists
234 - maxcolumns is an integer; when not set, maxcolumns is evaluated out of
235 the maximum column number in each row of data (not taking into account
236 the titles list)
237 - titles and each row in data is extended (or cutted) to fit maxcolumns;
238 when extending those lists, None entries are appended
239 - parser is used in addcolumn and thus must implement the expression parsing as
240 defined in _Idata
241 - further keyword arguments are passed to the constructor of _data"""
242 if len(data):
243 if maxcolumns is helper.nodefault:
244 maxcolumns = len(data[0])
245 for line in data[1:]:
246 if len(line) > maxcolumns:
247 maxcolumns = len(line)
248 titles = list(titles[:maxcolumns])
249 titles += [None] * (maxcolumns - len(titles))
250 data = list(data)
251 for i in range(len(data)):
252 data[i] = list(data[i]) + [None] * (maxcolumns - len(data[i]))
253 else:
254 titles = []
255 _data.__init__(self, data, titles, **kwargs)
258 class datafile(data):
260 "an implementation of _Idata reading data from a file"
262 __implements__ = _Idata
264 defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
265 defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
266 defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
268 def splitline(self, line, stringpattern, columnpattern, tofloat=1):
269 """returns a tuple created out of the string line
270 - matches stringpattern and columnpattern, adds the first group of that
271 match to the result and and removes those matches until the line is empty
272 - when stringpattern matched, the result is always kept as a string
273 - when columnpattern matched and tofloat is true, a conversion to a float
274 is tried; when this conversion fails, the string is kept"""
275 result = []
276 # try to gain speed by skip matching regular expressions
277 if line.find('"')!=-1 or \
278 stringpattern is not self.defaultstringpattern or \
279 columnpattern is not self.defaultcolumnpattern:
280 while len(line):
281 match = stringpattern.match(line)
282 if match:
283 result.append(match.groups()[0])
284 line = line[match.end():]
285 else:
286 match = columnpattern.match(line)
287 if tofloat:
288 try:
289 result.append(float(match.groups()[0]))
290 except (TypeError, ValueError):
291 result.append(match.groups()[0])
292 else:
293 result.append(match.groups()[0])
294 line = line[match.end():]
295 else:
296 if tofloat:
297 try:
298 return map(float, line.split())
299 except (TypeError, ValueError):
300 result = []
301 for r in line.split():
302 try:
303 result.append(float(r))
304 except (TypeError, ValueError):
305 result.append(r)
306 else:
307 return line.split()
309 return result
311 def __init__(self, file, commentpattern=defaultcommentpattern,
312 stringpattern=defaultstringpattern,
313 columnpattern=defaultcolumnpattern,
314 skiphead=0, skiptail=0, every=1, **kwargs):
315 """read data from a file
316 - file might either be a string or a file instance (something, that
317 provides readlines())
318 - each non-empty line, which does not match the commentpattern, is
319 considered to be a data row; columns are extracted by the splitline
320 method using tofloat=1
321 - the last line before a data line matching the commentpattern and
322 containing further characters is considered as the title line;
323 the title list is extracted by the splitline method using tofloat=0
324 - the first skiphead data lines are skiped
325 - the last skiptail data lines are skiped
326 - only every "every" data line is used (starting at the skiphead + 1 line)
327 - the number of columns is equalized between data and titles like
328 in the data constructor without setting maxcolumns
329 - further keyword arguments are passed to the constructor of data,
330 keyword arguments data, titles, and maxcolumns excluded"""
331 if helper.isstring(file):
332 file = open(file, "r")
333 usetitles = []
334 usedata = []
335 linenumber = 0
336 maxcolumns = 0
337 for line in file.readlines():
338 line = line.strip()
339 match = commentpattern.match(line)
340 if match:
341 if not len(usedata):
342 newtitles = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
343 if len(newtitles):
344 usetitles = newtitles
345 else:
346 linedata = []
347 for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
348 linedata.append(value)
349 if len(linedata):
350 if linenumber >= skiphead and not ((linenumber - skiphead) % every):
351 linedata = [linenumber + 1] + linedata
352 if len(linedata) > maxcolumns:
353 maxcolumns = len(linedata)
354 usedata.append(linedata)
355 linenumber += 1
356 if skiptail:
357 del usedata[-skiptail:]
358 data.__init__(self, data=usedata, titles=[None] + usetitles, maxcolumns=maxcolumns, **kwargs)
362 class sectionfile(_data):
364 def __init__(self, file, sectionstr = "section", **kwargs):
365 """read data from a config-like file
366 - file might either be a string or a file instance (something, that
367 is valid in config.readfp())
368 - each row is defined by a section in the config-like file (see
369 config module description)
370 - the columns for each row are defined by lines in the section file;
371 the title entries are used to identify the columns
372 - further keyword arguments are passed to the constructor of _data,
373 keyword arguments data and titles excluded"""
374 config = ConfigParser.ConfigParser()
375 config.optionxform = str
376 if helper.isstring(file):
377 config.readfp(open(file, "r"))
378 else:
379 config.readfp(file)
380 usedata = []
381 usetitles = [sectionstr]
382 sections = config.sections()
383 sections.sort()
384 for section in sections:
385 usedata.append([section] + [None for x in range(len(usetitles) - 1)])
386 for option in config.options(section):
387 if option == sectionstr:
388 raise ValueError("'%s' is already used as the section identifier" % sectionstr)
389 try:
390 index = usetitles.index(option)
391 except ValueError:
392 index = len(usetitles)
393 usetitles.append(option)
394 for line in usedata:
395 line.append(None)
396 value = config.get(section, option)
397 try:
398 usedata[-1][index] = float(value)
399 except (TypeError, ValueError):
400 usedata[-1][index] = value
401 _data.__init__(self, usedata, usetitles, **kwargs)