pdftex; texter (graph is currently broken); data docstrings --- minor other stuff...
[PyX/mjg.git] / pyx / data.py
blobda3cf699eba5f502e8c88c2a44830bd7b7666c3b
1 #!/usr/bin/env python
4 # Copyright (C) 2002 Jörg Lehmann <joergl@users.sourceforge.net>
5 # Copyright (C) 2002 André Wobst <wobsta@users.sourceforge.net>
7 # This file is part of PyX (http://pyx.sourceforge.net/).
9 # PyX is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 2 of the License, or
12 # (at your option) any later version.
14 # PyX is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with PyX; if not, write to the Free Software
21 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 import re, ConfigParser
25 import helper, mathtree
28 class ColumnError(Exception): pass
32 ColPattern = re.compile(r"\$(\(-?[0-9]+\)|-?[0-9]+)")
34 class MathTreeValCol(mathtree.MathTreeValVar):
35 """column id pattern like "$1" or "$(1)"
36 defines a new value pattern to identify columns by its number"""
38 # __implements__ = ... # TODO: mathtree interfaces
40 def InitByParser(self, arg):
41 Match = arg.MatchPattern(ColPattern)
42 if Match:
43 # just store the matched string -> handle this variable name later on
44 self.AddArg(Match)
45 return 1
48 # extent the list of possible values by MathTreeValCol
49 MathTreeValsWithCol = tuple(list(mathtree.DefaultMathTreeVals) + [MathTreeValCol])
52 class _Idata:
53 """interface definition of a data object
54 data objects store data arranged in rows and columns"""
56 titles = []
57 """column titles
58 - a list of strings storing the column titles
59 - the length of the list must match the number of columns
60 - any titles entry might be None, thus explicitly not providing a column title"""
62 data = []
63 """column/row data
64 - a list of rows where each row represents a data point
65 - each row contains a list, where each entry of the list represents a value for a column
66 - the number of columns for each data point must match the number of columns
67 - any column enty of any data point might be a float, a string, or None"""
69 def getcolumnno(self, column):
70 """returns a column number
71 - the column parameter might be an integer to be used as a column number
72 - a column number must be a valid list index (negative values are allowed)
73 - the column parameter might be a string contained in the titles list;
74 to be valid, the string must be unique within the titles list
75 - the method raises ColumnError when the value of the column parameter is invalid"""
77 def getcolumn(self, column):
78 """returns a column
79 - extracts a column out of self.data and returns it as a list
80 - the column is identified by the parameter column as in getcolumnno"""
82 def addcolumn(self, expression, context={}):
83 """adds a column defined by a mathematical expression
84 - evaluates the expression for each data row and adds a new column at
85 the end of each data row
86 - the expression must be a valid mathtree expression (see module mathtree)
87 with an extended variable name syntax: strings like "$i" and "$(i)" are
88 allowed where i is an integer
89 - a variable of the mathematical expression might either be a column title
90 or, by the extended variable name syntax, it defines an integer to be used
91 as a list index within the column list for each row
92 - context is a dictionary, where external variables and functions can be
93 given; those are used in the evaluation of the expression
94 - when the expression contains the character "=", everything after the last
95 "=" is interpreted as the mathematical expression while everything before
96 this character will be used as a column title for the new column; when no
97 "=" is contained in the expression, the hole expression is taken as the
98 mathematical expression and the column title is set to None"""
101 class _data:
103 """an (minimal) implementor of _Idata
104 other classes providing _Idata might be based on is class"""
106 __implements__ = _Idata
108 def __init__(self, data, titles, parser=mathtree.parser(MathTreeVals=MathTreeValsWithCol)):
109 """initializes an instance
110 - data and titles are just set as instance variables without further checks ---
111 they must be valid in terms of _Idata (expecially their sizes must fit)
112 - parser is used in addcolumn and thus must implement the expression parsing as
113 defined in _Idata"""
114 self.data = data
115 self.titles = titles
116 self.parser = parser
118 def getcolumnno(self, column):
119 if helper.isstring(column) and self.titles.count(column) == 1:
120 return self.titles.index(column)
121 try:
122 self.titles[column]
123 except (TypeError, IndexError, ValueError):
124 raise ColumnError
125 return column
127 def getcolumn(self, column):
128 columnno = self.getcolumnno(column)
129 return [x[columnno] for x in self.data]
131 def addcolumn(self, expression, context={}):
132 try:
133 split = expression.rindex("=")
134 except ValueError:
135 self.titles.append(None)
136 else:
137 self.titles.append(expression[:split])
138 expression = expression[split+1:]
139 tree = self.parser.parse(expression)
140 columnlist = {}
141 for key in tree.VarList():
142 if key[0] == "$":
143 if key[1] == "(":
144 column = int(key[2:-1])
145 else:
146 column = int(key[1:])
147 try:
148 self.titles[column]
149 except:
150 raise ColumnError
151 columnlist[key] = column
152 else:
153 try:
154 columnlist[key] = self.getcolumnno(key)
155 except ColumnError, e:
156 if key not in context.keys():
157 raise e
159 varlist = context.copy() # do not modify context
160 for data in self.data:
161 try:
162 for key in columnlist.keys():
163 varlist[key] = float(data[columnlist[key]])
164 except (TypeError, ValueError):
165 data.append(None)
166 else:
167 data.append(tree.Calc(**varlist))
170 class data(_data):
172 "an implementation of _Idata with an easy to use constructor"
174 __implements__ = _Idata
176 def __init__(self, data=[], titles=[], maxcolumns=helper.nodefault, **kwargs):
177 """initializes an instance
178 - data titles must be valid in terms of _Idata except for the number of
179 columns for each row, especially titles might be the default, e.g. []
180 - instead of lists for data, each row in data, and titles, tuples or
181 any other data structure with sequence like behavior might be used,
182 but they are converted to lists
183 - maxcolumns is an integer; when not set, maxcolumns is evaluated out of
184 the maximum column number in each row of data (not taking into account
185 the titles list)
186 - titles and each row in data is extended (or cutted) to fit maxcolumns;
187 when extending those lists, None entries are appended
188 - parser is used in addcolumn and thus must implement the expression parsing as
189 defined in _Idata
190 - further keyword arguments are passed to the constructor of _data"""
191 if len(data):
192 if maxcolumns is helper.nodefault:
193 maxcolumns = len(data[0])
194 for line in data[1:]:
195 if len(line) > maxcolumns:
196 maxcolumns = len(line)
197 titles = list(titles[:maxcolumns])
198 titles += [None] * (maxcolumns - len(titles))
199 data = list(data)
200 for i in range(len(data)):
201 data[i] = list(data[i]) + [None] * (maxcolumns - len(data[i]))
202 else:
203 titles = []
204 _data.__init__(self, data, titles, **kwargs)
207 class datafile(data):
209 "an implementation of _Idata reading data from a file"
211 __implements__ = _Idata
213 defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
214 defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
215 defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
217 def splitline(self, line, stringpattern, columnpattern, tofloat=1):
218 """returns a tuple created out of the string line
219 - matches stringpattern and columnpattern, adds the first group of that
220 match to the result and and removes those matches until the line is empty
221 - when stringpattern matched, the result is always kept as a string
222 - when columnpattern matched and tofloat is true, a conversion to a float
223 is tried; when this conversion fails, the string is kept"""
224 result = []
225 # try to gain speed by skip matching regular expressions
226 if line.find('"')!=-1 or \
227 stringpattern is not self.defaultstringpattern or \
228 columnpattern is not self.defaultcolumnpattern:
229 while len(line):
230 match = stringpattern.match(line)
231 if match:
232 result.append(match.groups()[0])
233 line = line[match.end():]
234 else:
235 match = columnpattern.match(line)
236 if tofloat:
237 try:
238 result.append(float(match.groups()[0]))
239 except (TypeError, ValueError):
240 result.append(match.groups()[0])
241 else:
242 result.append(match.groups()[0])
243 line = line[match.end():]
244 else:
245 if tofloat:
246 try:
247 return map(float, line.split())
248 except (TypeError, ValueError):
249 result = []
250 for r in line.split():
251 try:
252 result.append(float(r))
253 except (TypeError, ValueError):
254 result.append(r)
255 else:
256 return line.split()
258 return result
260 def __init__(self, file, commentpattern=defaultcommentpattern,
261 stringpattern=defaultstringpattern,
262 columnpattern=defaultcolumnpattern,
263 skiphead=0, skiptail=0, every=1, **kwargs):
264 """read data from a file
265 - file might either be a string or a file instance (something, that
266 provides readlines())
267 - each non-empty line, which does not match the commentpattern, is
268 considered to be a data row; columns are extracted by the splitline
269 method using tofloat=1
270 - the last line before a data line matching the commentpattern and
271 containing further characters is considered as the title line;
272 the title list is extracted by the splitline method using tofloat=0
273 - the first skiphead data lines are skiped
274 - the last skiptail data lines are skiped
275 - only every "every" data line is used (starting at the skiphead + 1 line)
276 - the number of columns is equalized between data and titles like
277 in the data constructor without setting maxcolumns
278 - further keyword arguments are passed to the constructor of data,
279 keyword arguments data, titles, and maxcolumns excluded"""
280 if helper.isstring(file):
281 file = open(file, "r")
282 usetitles = []
283 usedata = []
284 linenumber = 0
285 maxcolumns = 0
286 for line in file.readlines():
287 line = line.strip()
288 match = commentpattern.match(line)
289 if match:
290 if not len(usedata):
291 newtitles = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
292 if len(newtitles):
293 usetitles = newtitles
294 else:
295 linedata = []
296 for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
297 linedata.append(value)
298 if len(linedata):
299 if linenumber >= skiphead and not ((linenumber - skiphead) % every):
300 linedata = [linenumber + 1] + linedata
301 if len(linedata) > maxcolumns:
302 maxcolumns = len(linedata)
303 usedata.append(linedata)
304 linenumber += 1
305 if skiptail:
306 del usedata[-skiptail:]
307 data.__init__(self, data=usedata, titles=[None] + usetitles, maxcolumns=maxcolumns, **kwargs)
311 class sectionfile(_data):
313 def __init__(self, file, sectionstr = "section", **kwargs):
314 """read data from a config-like file
315 - file might either be a string or a file instance (something, that
316 is valid in config.readfp())
317 - each row is defined by a section in the config-like file (see
318 config module description)
319 - the columns for each row are defined by lines in the section file;
320 the title entries are used to identify the columns
321 - further keyword arguments are passed to the constructor of _data,
322 keyword arguments data and titles excluded"""
323 config = ConfigParser.ConfigParser()
324 config.optionxform = str
325 if helper.isstring(file):
326 config.readfp(open(file, "r"))
327 else:
328 config.readfp(file)
329 usedata = []
330 usetitles = [sectionstr]
331 sections = config.sections()
332 sections.sort()
333 for section in sections:
334 usedata.append([section] + [None for x in range(len(usetitles) - 1)])
335 for option in config.options(section):
336 if option == sectionstr:
337 raise ValueError("'%s' is already used as the section identifier" % sectionstr)
338 try:
339 index = usetitles.index(option)
340 except ValueError:
341 index = len(usetitles)
342 usetitles.append(option)
343 for line in usedata:
344 line.append(None)
345 value = config.get(section, option)
346 try:
347 usedata[-1][index] = float(value)
348 except (TypeError, ValueError):
349 usedata[-1][index] = value
350 _data.__init__(self, usedata, usetitles, **kwargs)