make all parts of the manual compile again; parts of the manual are still out of...
[PyX/mjg.git] / pyx / data.py
blob34015ce4c5ffc75d4538db24907f827080efc64c
1 #!/usr/bin/env python
2 # -*- coding: ISO-8859-1 -*-
5 # Copyright (C) 2002 Jörg Lehmann <joergl@users.sourceforge.net>
6 # Copyright (C) 2002 André Wobst <wobsta@users.sourceforge.net>
8 # This file is part of PyX (http://pyx.sourceforge.net/).
10 # PyX is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation; either version 2 of the License, or
13 # (at your option) any later version.
15 # PyX is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
20 # You should have received a copy of the GNU General Public License
21 # along with PyX; if not, write to the Free Software
22 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 import re, ConfigParser
26 import helper, mathtree
29 class ColumnError(Exception): pass
33 ColPattern = re.compile(r"\$(\(-?[0-9]+\)|-?[0-9]+)")
35 class MathTreeValCol(mathtree.MathTreeValVar):
36 """column id pattern like "$1" or "$(1)"
37 defines a new value pattern to identify columns by its number"""
39 # __implements__ = ... # TODO: mathtree interfaces
41 def InitByParser(self, arg):
42 Match = arg.MatchPattern(ColPattern)
43 if Match:
44 # just store the matched string -> handle this variable name later on
45 self.AddArg(Match)
46 return 1
49 # extent the list of possible values by MathTreeValCol
50 MathTreeValsWithCol = tuple(list(mathtree.DefaultMathTreeVals) + [MathTreeValCol])
53 class _Idata:
54 """interface definition of a data object
55 data objects store data arranged in rows and columns"""
57 titles = []
58 """column titles
59 - a list of strings storing the column titles
60 - the length of the list must match the number of columns
61 - any titles entry might be None, thus explicitly not providing a column title"""
63 data = []
64 """column/row data
65 - a list of rows where each row represents a data point
66 - each row contains a list, where each entry of the list represents a value for a column
67 - the number of columns for each data point must match the number of columns
68 - any column enty of any data point might be a float, a string, or None"""
70 def getcolumnno(self, column):
71 """returns a column number
72 - the column parameter might be an integer to be used as a column number
73 - a column number must be a valid list index (negative values are allowed)
74 - the column parameter might be a string contained in the titles list;
75 to be valid, the string must be unique within the titles list
76 - the method raises ColumnError when the value of the column parameter is invalid"""
78 def getcolumn(self, column):
79 """returns a column
80 - extracts a column out of self.data and returns it as a list
81 - the column is identified by the parameter column as in getcolumnno"""
83 def addcolumn(self, expression, context={}):
84 """adds a column defined by a mathematical expression
85 - evaluates the expression for each data row and adds a new column at
86 the end of each data row
87 - the expression must be a valid mathtree expression (see module mathtree)
88 with an extended variable name syntax: strings like "$i" and "$(i)" are
89 allowed where i is an integer
90 - a variable of the mathematical expression might either be a column title
91 or, by the extended variable name syntax, it defines an integer to be used
92 as a list index within the column list for each row
93 - context is a dictionary, where external variables and functions can be
94 given; those are used in the evaluation of the expression
95 - when the expression contains the character "=", everything after the last
96 "=" is interpreted as the mathematical expression while everything before
97 this character will be used as a column title for the new column; when no
98 "=" is contained in the expression, the hole expression is taken as the
99 mathematical expression and the column title is set to None"""
102 class _data:
104 """an (minimal) implementor of _Idata
105 other classes providing _Idata might be based on is class"""
107 __implements__ = _Idata
109 def __init__(self, data, titles, parser=mathtree.parser(MathTreeVals=MathTreeValsWithCol)):
110 """initializes an instance
111 - data and titles are just set as instance variables without further checks ---
112 they must be valid in terms of _Idata (expecially their sizes must fit)
113 - parser is used in addcolumn and thus must implement the expression parsing as
114 defined in _Idata"""
115 self.data = data
116 self.titles = titles
117 self.parser = parser
119 def getcolumnno(self, column):
120 if helper.isstring(column) and self.titles.count(column) == 1:
121 return self.titles.index(column)
122 try:
123 self.titles[column]
124 except (TypeError, IndexError, ValueError):
125 raise ColumnError
126 return column
128 def getcolumn(self, column):
129 columnno = self.getcolumnno(column)
130 return [x[columnno] for x in self.data]
132 def addcolumn(self, expression, context={}):
133 try:
134 split = expression.rindex("=")
135 except ValueError:
136 self.titles.append(None)
137 else:
138 self.titles.append(expression[:split])
139 expression = expression[split+1:]
140 tree = self.parser.parse(expression)
141 columnlist = {}
142 for key in tree.VarList():
143 if key[0] == "$":
144 if key[1] == "(":
145 column = int(key[2:-1])
146 else:
147 column = int(key[1:])
148 try:
149 self.titles[column]
150 except:
151 raise ColumnError
152 columnlist[key] = column
153 else:
154 try:
155 columnlist[key] = self.getcolumnno(key)
156 except ColumnError, e:
157 if key not in context.keys():
158 raise e
160 varlist = context.copy() # do not modify context
161 for data in self.data:
162 try:
163 for key in columnlist.keys():
164 varlist[key] = float(data[columnlist[key]])
165 except (TypeError, ValueError):
166 data.append(None)
167 else:
168 data.append(tree.Calc(**varlist))
171 class data(_data):
173 "an implementation of _Idata with an easy to use constructor"
175 __implements__ = _Idata
177 def __init__(self, data=[], titles=[], maxcolumns=helper.nodefault, **kwargs):
178 """initializes an instance
179 - data titles must be valid in terms of _Idata except for the number of
180 columns for each row, especially titles might be the default, e.g. []
181 - instead of lists for data, each row in data, and titles, tuples or
182 any other data structure with sequence like behavior might be used,
183 but they are converted to lists
184 - maxcolumns is an integer; when not set, maxcolumns is evaluated out of
185 the maximum column number in each row of data (not taking into account
186 the titles list)
187 - titles and each row in data is extended (or cutted) to fit maxcolumns;
188 when extending those lists, None entries are appended
189 - parser is used in addcolumn and thus must implement the expression parsing as
190 defined in _Idata
191 - further keyword arguments are passed to the constructor of _data"""
192 if len(data):
193 if maxcolumns is helper.nodefault:
194 maxcolumns = len(data[0])
195 for line in data[1:]:
196 if len(line) > maxcolumns:
197 maxcolumns = len(line)
198 titles = list(titles[:maxcolumns])
199 titles += [None] * (maxcolumns - len(titles))
200 data = list(data)
201 for i in range(len(data)):
202 data[i] = list(data[i]) + [None] * (maxcolumns - len(data[i]))
203 else:
204 titles = []
205 _data.__init__(self, data, titles, **kwargs)
208 class datafile(data):
210 "an implementation of _Idata reading data from a file"
212 __implements__ = _Idata
214 defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
215 defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
216 defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
218 def splitline(self, line, stringpattern, columnpattern, tofloat=1):
219 """returns a tuple created out of the string line
220 - matches stringpattern and columnpattern, adds the first group of that
221 match to the result and and removes those matches until the line is empty
222 - when stringpattern matched, the result is always kept as a string
223 - when columnpattern matched and tofloat is true, a conversion to a float
224 is tried; when this conversion fails, the string is kept"""
225 result = []
226 # try to gain speed by skip matching regular expressions
227 if line.find('"')!=-1 or \
228 stringpattern is not self.defaultstringpattern or \
229 columnpattern is not self.defaultcolumnpattern:
230 while len(line):
231 match = stringpattern.match(line)
232 if match:
233 result.append(match.groups()[0])
234 line = line[match.end():]
235 else:
236 match = columnpattern.match(line)
237 if tofloat:
238 try:
239 result.append(float(match.groups()[0]))
240 except (TypeError, ValueError):
241 result.append(match.groups()[0])
242 else:
243 result.append(match.groups()[0])
244 line = line[match.end():]
245 else:
246 if tofloat:
247 try:
248 return map(float, line.split())
249 except (TypeError, ValueError):
250 result = []
251 for r in line.split():
252 try:
253 result.append(float(r))
254 except (TypeError, ValueError):
255 result.append(r)
256 else:
257 return line.split()
259 return result
261 def __init__(self, file, commentpattern=defaultcommentpattern,
262 stringpattern=defaultstringpattern,
263 columnpattern=defaultcolumnpattern,
264 skiphead=0, skiptail=0, every=1, **kwargs):
265 """read data from a file
266 - file might either be a string or a file instance (something, that
267 provides readlines())
268 - each non-empty line, which does not match the commentpattern, is
269 considered to be a data row; columns are extracted by the splitline
270 method using tofloat=1
271 - the last line before a data line matching the commentpattern and
272 containing further characters is considered as the title line;
273 the title list is extracted by the splitline method using tofloat=0
274 - the first skiphead data lines are skiped
275 - the last skiptail data lines are skiped
276 - only every "every" data line is used (starting at the skiphead + 1 line)
277 - the number of columns is equalized between data and titles like
278 in the data constructor without setting maxcolumns
279 - further keyword arguments are passed to the constructor of data,
280 keyword arguments data, titles, and maxcolumns excluded"""
281 if helper.isstring(file):
282 file = open(file, "r")
283 usetitles = []
284 usedata = []
285 linenumber = 0
286 maxcolumns = 0
287 for line in file.readlines():
288 line = line.strip()
289 match = commentpattern.match(line)
290 if match:
291 if not len(usedata):
292 newtitles = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
293 if len(newtitles):
294 usetitles = newtitles
295 else:
296 linedata = []
297 for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
298 linedata.append(value)
299 if len(linedata):
300 if linenumber >= skiphead and not ((linenumber - skiphead) % every):
301 linedata = [linenumber + 1] + linedata
302 if len(linedata) > maxcolumns:
303 maxcolumns = len(linedata)
304 usedata.append(linedata)
305 linenumber += 1
306 if skiptail:
307 del usedata[-skiptail:]
308 data.__init__(self, data=usedata, titles=[None] + usetitles, maxcolumns=maxcolumns, **kwargs)
312 class sectionfile(_data):
314 def __init__(self, file, sectionstr = "section", **kwargs):
315 """read data from a config-like file
316 - file might either be a string or a file instance (something, that
317 is valid in config.readfp())
318 - each row is defined by a section in the config-like file (see
319 config module description)
320 - the columns for each row are defined by lines in the section file;
321 the title entries are used to identify the columns
322 - further keyword arguments are passed to the constructor of _data,
323 keyword arguments data and titles excluded"""
324 config = ConfigParser.ConfigParser()
325 config.optionxform = str
326 if helper.isstring(file):
327 config.readfp(open(file, "r"))
328 else:
329 config.readfp(file)
330 usedata = []
331 usetitles = [sectionstr]
332 sections = config.sections()
333 sections.sort()
334 for section in sections:
335 usedata.append([section] + [None for x in range(len(usetitles) - 1)])
336 for option in config.options(section):
337 if option == sectionstr:
338 raise ValueError("'%s' is already used as the section identifier" % sectionstr)
339 try:
340 index = usetitles.index(option)
341 except ValueError:
342 index = len(usetitles)
343 usetitles.append(option)
344 for line in usedata:
345 line.append(None)
346 value = config.get(section, option)
347 try:
348 usedata[-1][index] = float(value)
349 except (TypeError, ValueError):
350 usedata[-1][index] = value
351 _data.__init__(self, usedata, usetitles, **kwargs)