4 # Copyright (C) 2002 Jörg Lehmann <joergl@users.sourceforge.net>
5 # Copyright (C) 2002 André Wobst <wobsta@users.sourceforge.net>
7 # This file is part of PyX (http://pyx.sourceforge.net/).
9 # PyX is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 2 of the License, or
12 # (at your option) any later version.
14 # PyX is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with PyX; if not, write to the Free Software
21 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 import re
, ConfigParser
25 import helper
, mathtree
28 class ColumnError(Exception): pass
32 ColPattern
= re
.compile(r
"\$(\(-?[0-9]+\)|-?[0-9]+)")
34 class MathTreeValCol(mathtree
.MathTreeValVar
):
35 """column id pattern like "$1" or "$(1)"
36 defines a new value pattern to identify columns by its number"""
38 # __implements__ = ... # TODO: mathtree interfaces
40 def InitByParser(self
, arg
):
41 Match
= arg
.MatchPattern(ColPattern
)
43 # just store the matched string -> handle this variable name later on
48 # extent the list of possible values by MathTreeValCol
49 MathTreeValsWithCol
= tuple(list(mathtree
.DefaultMathTreeVals
) + [MathTreeValCol
])
53 """interface definition of a data object
54 data objects store data arranged in rows and columns"""
58 - a list of strings storing the column titles
59 - the length of the list must match the number of columns
60 - any titles entry might be None, thus explicitly not providing a column title"""
64 - a list of rows where each row represents a data point
65 - each row contains a list, where each entry of the list represents a value for a column
66 - the number of columns for each data point must match the number of columns
67 - any column enty of any data point might be a float, a string, or None"""
69 def getcolumnno(self
, column
):
70 """returns a column number
71 - the column parameter might be an integer to be used as a column number
72 - a column number must be a valid list index (negative values are allowed)
73 - the column parameter might be a string contained in the titles list;
74 to be valid, the string must be unique within the titles list
75 - the method raises ColumnError when the value of the column parameter is invalid"""
77 def getcolumn(self
, column
):
79 - extracts a column out of self.data and returns it as a list
80 - the column is identified by the parameter column as in getcolumnno"""
82 def addcolumn(self
, expression
, context
={}):
83 """adds a column defined by a mathematical expression
84 - evaluates the expression for each data row and adds a new column at
85 the end of each data row
86 - the expression must be a valid mathtree expression (see module mathtree)
87 with an extended variable name syntax: strings like "$i" and "$(i)" are
88 allowed where i is an integer
89 - a variable of the mathematical expression might either be a column title
90 or, by the extended variable name syntax, it defines an integer to be used
91 as a list index within the column list for each row
92 - context is a dictionary, where external variables and functions can be
93 given; those are used in the evaluation of the expression
94 - when the expression contains the character "=", everything after the last
95 "=" is interpreted as the mathematical expression while everything before
96 this character will be used as a column title for the new column; when no
97 "=" is contained in the expression, the hole expression is taken as the
98 mathematical expression and the column title is set to None"""
103 """an (minimal) implementor of _Idata
104 other classes providing _Idata might be based on is class"""
106 __implements__
= _Idata
108 def __init__(self
, data
, titles
, parser
=mathtree
.parser(MathTreeVals
=MathTreeValsWithCol
)):
109 """initializes an instance
110 - data and titles are just set as instance variables without further checks ---
111 they must be valid in terms of _Idata (expecially their sizes must fit)
112 - parser is used in addcolumn and thus must implement the expression parsing as
118 def getcolumnno(self
, column
):
119 if helper
.isstring(column
) and self
.titles
.count(column
) == 1:
120 return self
.titles
.index(column
)
123 except (TypeError, IndexError, ValueError):
127 def getcolumn(self
, column
):
128 columnno
= self
.getcolumnno(column
)
129 return [x
[columnno
] for x
in self
.data
]
131 def addcolumn(self
, expression
, context
={}):
133 split
= expression
.rindex("=")
135 self
.titles
.append(None)
137 self
.titles
.append(expression
[:split
])
138 expression
= expression
[split
+1:]
139 tree
= self
.parser
.parse(expression
)
141 for key
in tree
.VarList():
144 column
= int(key
[2:-1])
146 column
= int(key
[1:])
151 columnlist
[key
] = column
154 columnlist
[key
] = self
.getcolumnno(key
)
155 except ColumnError
, e
:
156 if key
not in context
.keys():
159 varlist
= context
.copy() # do not modify context
160 for data
in self
.data
:
162 for key
in columnlist
.keys():
163 varlist
[key
] = float(data
[columnlist
[key
]])
164 except (TypeError, ValueError):
167 data
.append(tree
.Calc(**varlist
))
172 "an implementation of _Idata with an easy to use constructor"
174 __implements__
= _Idata
176 def __init__(self
, data
=[], titles
=[], maxcolumns
=helper
.nodefault
, **kwargs
):
177 """initializes an instance
178 - data titles must be valid in terms of _Idata except for the number of
179 columns for each row, especially titles might be the default, e.g. []
180 - instead of lists for data, each row in data, and titles, tuples or
181 any other data structure with sequence like behavior might be used,
182 but they are converted to lists
183 - maxcolumns is an integer; when not set, maxcolumns is evaluated out of
184 the maximum column number in each row of data (not taking into account
186 - titles and each row in data is extended (or cutted) to fit maxcolumns;
187 when extending those lists, None entries are appended
188 - parser is used in addcolumn and thus must implement the expression parsing as
190 - further keyword arguments are passed to the constructor of _data"""
192 if maxcolumns
is helper
.nodefault
:
193 maxcolumns
= len(data
[0])
194 for line
in data
[1:]:
195 if len(line
) > maxcolumns
:
196 maxcolumns
= len(line
)
197 titles
= list(titles
[:maxcolumns
])
198 titles
+= [None] * (maxcolumns
- len(titles
))
200 for i
in range(len(data
)):
201 data
[i
] = list(data
[i
]) + [None] * (maxcolumns
- len(data
[i
]))
204 _data
.__init
__(self
, data
, titles
, **kwargs
)
207 class datafile(data
):
209 "an implementation of _Idata reading data from a file"
211 __implements__
= _Idata
213 defaultcommentpattern
= re
.compile(r
"(#+|!+|%+)\s*")
214 defaultstringpattern
= re
.compile(r
"\"(.*?
)\"(\s
+|$
)")
215 defaultcolumnpattern = re.compile(r"(.*?
)(\s
+|$
)")
217 def splitline(self, line, stringpattern, columnpattern, tofloat=1):
218 """returns a tuple created out of the string line
219 - matches stringpattern and columnpattern, adds the first group of that
220 match to the result and and removes those matches until the line is empty
221 - when stringpattern matched, the result is always kept as a string
222 - when columnpattern matched and tofloat is true, a conversion to a float
223 is tried; when this conversion fails, the string is kept"""
225 # try to gain speed by skip matching regular expressions
226 if line.find('"')!=-1 or \
227 stringpattern is not self.defaultstringpattern or \
228 columnpattern is not self.defaultcolumnpattern:
230 match = stringpattern.match(line)
232 result.append(match.groups()[0])
233 line = line[match.end():]
235 match = columnpattern.match(line)
238 result.append(float(match.groups()[0]))
239 except (TypeError, ValueError):
240 result.append(match.groups()[0])
242 result.append(match.groups()[0])
243 line = line[match.end():]
247 return map(float, line.split())
248 except (TypeError, ValueError):
250 for r in line.split():
252 result.append(float(r))
253 except (TypeError, ValueError):
260 def __init__(self, file, commentpattern=defaultcommentpattern,
261 stringpattern=defaultstringpattern,
262 columnpattern=defaultcolumnpattern,
263 skiphead=0, skiptail=0, every=1, **kwargs):
264 """read data from a file
265 - file might either be a string or a file instance (something, that
266 provides readlines())
267 - each non-empty line, which does not match the commentpattern, is
268 considered to be a data row; columns are extracted by the splitline
269 method using tofloat=1
270 - the last line before a data line matching the commentpattern and
271 containing further characters is considered as the title line;
272 the title list is extracted by the splitline method using tofloat=0
273 - the first skiphead data lines are skiped
274 - the last skiptail data lines are skiped
275 - only every "every" data line is used (starting at the skiphead + 1 line)
276 - the number of columns is equalized between data and titles like
277 in the data constructor without setting maxcolumns
278 - further keyword arguments are passed to the constructor of data,
279 keyword arguments data, titles, and maxcolumns excluded"""
280 if helper.isstring(file):
281 file = open(file, "r")
286 for line in file.readlines():
288 match = commentpattern.match(line)
291 newtitles = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
293 usetitles = newtitles
296 for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
297 linedata.append(value)
299 if linenumber >= skiphead and not ((linenumber - skiphead) % every):
300 linedata = [linenumber + 1] + linedata
301 if len(linedata) > maxcolumns:
302 maxcolumns = len(linedata)
303 usedata.append(linedata)
306 del usedata[-skiptail:]
307 data.__init__(self, data=usedata, titles=[None] + usetitles, maxcolumns=maxcolumns, **kwargs)
311 class sectionfile(_data):
313 def __init__(self, file, sectionstr = "section", **kwargs):
314 """read data from a config-like file
315 - file might either be a string or a file instance (something, that
316 is valid in config.readfp())
317 - each row is defined by a section in the config-like file (see
318 config module description)
319 - the columns for each row are defined by lines in the section file;
320 the title entries are used to identify the columns
321 - further keyword arguments are passed to the constructor of _data,
322 keyword arguments data and titles excluded"""
323 config = ConfigParser.ConfigParser()
324 config.optionxform = str
325 if helper.isstring(file):
326 config.readfp(open(file, "r"))
330 usetitles = [sectionstr]
331 sections = config.sections()
333 for section in sections:
334 usedata.append([section] + [None for x in range(len(usetitles) - 1)])
335 for option in config.options(section):
336 if option == sectionstr:
337 raise ValueError("'%s' is already used as the section identifier" % sectionstr)
339 index = usetitles.index(option)
341 index = len(usetitles)
342 usetitles.append(option)
345 value = config.get(section, option)
347 usedata[-1][index] = float(value)
348 except (TypeError, ValueError):
349 usedata[-1][index] = value
350 _data.__init__(self, usedata, usetitles, **kwargs)