2 # -*- coding: ISO-8859-1 -*-
5 # Copyright (C) 2002 Jörg Lehmann <joergl@users.sourceforge.net>
6 # Copyright (C) 2002 André Wobst <wobsta@users.sourceforge.net>
8 # This file is part of PyX (http://pyx.sourceforge.net/).
10 # PyX is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation; either version 2 of the License, or
13 # (at your option) any later version.
15 # PyX is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
20 # You should have received a copy of the GNU General Public License
21 # along with PyX; if not, write to the Free Software
22 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 import re
, ConfigParser
26 import helper
, mathtree
29 class ColumnError(Exception): pass
33 ColPattern
= re
.compile(r
"\$(\(-?[0-9]+\)|-?[0-9]+)")
35 class MathTreeValCol(mathtree
.MathTreeValVar
):
36 """column id pattern like "$1" or "$(1)"
37 defines a new value pattern to identify columns by its number"""
39 # __implements__ = ... # TODO: mathtree interfaces
41 def InitByParser(self
, arg
):
42 Match
= arg
.MatchPattern(ColPattern
)
44 # just store the matched string -> handle this variable name later on
49 # extent the list of possible values by MathTreeValCol
50 MathTreeValsWithCol
= tuple(list(mathtree
.DefaultMathTreeVals
) + [MathTreeValCol
])
54 """interface definition of a data object
55 data objects store data arranged in rows and columns"""
59 - a list of strings storing the column titles
60 - the length of the list must match the number of columns
61 - any titles entry might be None, thus explicitly not providing a column title"""
65 - a list of rows where each row represents a data point
66 - each row contains a list, where each entry of the list represents a value for a column
67 - the number of columns for each data point must match the number of columns
68 - any column enty of any data point might be a float, a string, or None"""
70 def getcolumnno(self
, column
):
71 """returns a column number
72 - the column parameter might be an integer to be used as a column number
73 - a column number must be a valid list index (negative values are allowed)
74 - the column parameter might be a string contained in the titles list;
75 to be valid, the string must be unique within the titles list
76 - the method raises ColumnError when the value of the column parameter is invalid"""
78 def getcolumn(self
, column
):
80 - extracts a column out of self.data and returns it as a list
81 - the column is identified by the parameter column as in getcolumnno"""
83 def addcolumn(self
, expression
, context
={}):
84 """adds a column defined by a mathematical expression
85 - evaluates the expression for each data row and adds a new column at
86 the end of each data row
87 - the expression must be a valid mathtree expression (see module mathtree)
88 with an extended variable name syntax: strings like "$i" and "$(i)" are
89 allowed where i is an integer
90 - a variable of the mathematical expression might either be a column title
91 or, by the extended variable name syntax, it defines an integer to be used
92 as a list index within the column list for each row
93 - context is a dictionary, where external variables and functions can be
94 given; those are used in the evaluation of the expression
95 - when the expression contains the character "=", everything after the last
96 "=" is interpreted as the mathematical expression while everything before
97 this character will be used as a column title for the new column; when no
98 "=" is contained in the expression, the hole expression is taken as the
99 mathematical expression and the column title is set to None"""
104 """an (minimal) implementor of _Idata
105 other classes providing _Idata might be based on is class"""
107 __implements__
= _Idata
109 def __init__(self
, data
, titles
, parser
=mathtree
.parser(MathTreeVals
=MathTreeValsWithCol
)):
110 """initializes an instance
111 - data and titles are just set as instance variables without further checks ---
112 they must be valid in terms of _Idata (expecially their sizes must fit)
113 - parser is used in addcolumn and thus must implement the expression parsing as
119 def getcolumnno(self
, column
):
120 if helper
.isstring(column
) and self
.titles
.count(column
) == 1:
121 return self
.titles
.index(column
)
124 except (TypeError, IndexError, ValueError):
128 def getcolumn(self
, column
):
129 columnno
= self
.getcolumnno(column
)
130 return [x
[columnno
] for x
in self
.data
]
132 def addcolumn(self
, expression
, context
={}):
134 split
= expression
.rindex("=")
136 self
.titles
.append(None)
138 self
.titles
.append(expression
[:split
])
139 expression
= expression
[split
+1:]
140 tree
= self
.parser
.parse(expression
)
142 for key
in tree
.VarList():
145 column
= int(key
[2:-1])
147 column
= int(key
[1:])
152 columnlist
[key
] = column
155 columnlist
[key
] = self
.getcolumnno(key
)
156 except ColumnError
, e
:
157 if key
not in context
.keys():
160 varlist
= context
.copy() # do not modify context
161 for data
in self
.data
:
163 for key
in columnlist
.keys():
164 varlist
[key
] = float(data
[columnlist
[key
]])
165 except (TypeError, ValueError):
168 data
.append(tree
.Calc(**varlist
))
173 "an implementation of _Idata with an easy to use constructor"
175 __implements__
= _Idata
177 def __init__(self
, data
=[], titles
=[], maxcolumns
=helper
.nodefault
, **kwargs
):
178 """initializes an instance
179 - data titles must be valid in terms of _Idata except for the number of
180 columns for each row, especially titles might be the default, e.g. []
181 - instead of lists for data, each row in data, and titles, tuples or
182 any other data structure with sequence like behavior might be used,
183 but they are converted to lists
184 - maxcolumns is an integer; when not set, maxcolumns is evaluated out of
185 the maximum column number in each row of data (not taking into account
187 - titles and each row in data is extended (or cutted) to fit maxcolumns;
188 when extending those lists, None entries are appended
189 - parser is used in addcolumn and thus must implement the expression parsing as
191 - further keyword arguments are passed to the constructor of _data"""
193 if maxcolumns
is helper
.nodefault
:
194 maxcolumns
= len(data
[0])
195 for line
in data
[1:]:
196 if len(line
) > maxcolumns
:
197 maxcolumns
= len(line
)
198 titles
= list(titles
[:maxcolumns
])
199 titles
+= [None] * (maxcolumns
- len(titles
))
201 for i
in range(len(data
)):
202 data
[i
] = list(data
[i
]) + [None] * (maxcolumns
- len(data
[i
]))
205 _data
.__init
__(self
, data
, titles
, **kwargs
)
208 class datafile(data
):
210 "an implementation of _Idata reading data from a file"
212 __implements__
= _Idata
214 defaultcommentpattern
= re
.compile(r
"(#+|!+|%+)\s*")
215 defaultstringpattern
= re
.compile(r
"\"(.*?
)\"(\s
+|$
)")
216 defaultcolumnpattern = re.compile(r"(.*?
)(\s
+|$
)")
218 def splitline(self, line, stringpattern, columnpattern, tofloat=1):
219 """returns a tuple created out of the string line
220 - matches stringpattern and columnpattern, adds the first group of that
221 match to the result and and removes those matches until the line is empty
222 - when stringpattern matched, the result is always kept as a string
223 - when columnpattern matched and tofloat is true, a conversion to a float
224 is tried; when this conversion fails, the string is kept"""
226 # try to gain speed by skip matching regular expressions
227 if line.find('"')!=-1 or \
228 stringpattern is not self.defaultstringpattern or \
229 columnpattern is not self.defaultcolumnpattern:
231 match = stringpattern.match(line)
233 result.append(match.groups()[0])
234 line = line[match.end():]
236 match = columnpattern.match(line)
239 result.append(float(match.groups()[0]))
240 except (TypeError, ValueError):
241 result.append(match.groups()[0])
243 result.append(match.groups()[0])
244 line = line[match.end():]
248 return map(float, line.split())
249 except (TypeError, ValueError):
251 for r in line.split():
253 result.append(float(r))
254 except (TypeError, ValueError):
261 def __init__(self, file, commentpattern=defaultcommentpattern,
262 stringpattern=defaultstringpattern,
263 columnpattern=defaultcolumnpattern,
264 skiphead=0, skiptail=0, every=1, **kwargs):
265 """read data from a file
266 - file might either be a string or a file instance (something, that
267 provides readlines())
268 - each non-empty line, which does not match the commentpattern, is
269 considered to be a data row; columns are extracted by the splitline
270 method using tofloat=1
271 - the last line before a data line matching the commentpattern and
272 containing further characters is considered as the title line;
273 the title list is extracted by the splitline method using tofloat=0
274 - the first skiphead data lines are skiped
275 - the last skiptail data lines are skiped
276 - only every "every" data line is used (starting at the skiphead + 1 line)
277 - the number of columns is equalized between data and titles like
278 in the data constructor without setting maxcolumns
279 - further keyword arguments are passed to the constructor of data,
280 keyword arguments data, titles, and maxcolumns excluded"""
281 if helper.isstring(file):
282 file = open(file, "r")
287 for line in file.readlines():
289 match = commentpattern.match(line)
292 newtitles = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
294 usetitles = newtitles
297 for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
298 linedata.append(value)
300 if linenumber >= skiphead and not ((linenumber - skiphead) % every):
301 linedata = [linenumber + 1] + linedata
302 if len(linedata) > maxcolumns:
303 maxcolumns = len(linedata)
304 usedata.append(linedata)
307 del usedata[-skiptail:]
308 data.__init__(self, data=usedata, titles=[None] + usetitles, maxcolumns=maxcolumns, **kwargs)
312 class sectionfile(_data):
314 def __init__(self, file, sectionstr = "section", **kwargs):
315 """read data from a config-like file
316 - file might either be a string or a file instance (something, that
317 is valid in config.readfp())
318 - each row is defined by a section in the config-like file (see
319 config module description)
320 - the columns for each row are defined by lines in the section file;
321 the title entries are used to identify the columns
322 - further keyword arguments are passed to the constructor of _data,
323 keyword arguments data and titles excluded"""
324 config = ConfigParser.ConfigParser()
325 config.optionxform = str
326 if helper.isstring(file):
327 config.readfp(open(file, "r"))
331 usetitles = [sectionstr]
332 sections = config.sections()
334 for section in sections:
335 usedata.append([section] + [None for x in range(len(usetitles) - 1)])
336 for option in config.options(section):
337 if option == sectionstr:
338 raise ValueError("'%s' is already used as the section identifier" % sectionstr)
340 index = usetitles.index(option)
342 index = len(usetitles)
343 usetitles.append(option)
346 value = config.get(section, option)
348 usedata[-1][index] = float(value)
349 except (TypeError, ValueError):
350 usedata[-1][index] = value
351 _data.__init__(self, usedata, usetitles, **kwargs)