math, error_reporting, and urischemes moved to the utils package.
[docutils.git] / docutils / parsers / rst / directives / tables.py
blob9ae2365c521d90b3d62ccfc9106faa406cd5ea23
1 # $Id$
2 # Authors: David Goodger <goodger@python.org>; David Priest
3 # Copyright: This module has been placed in the public domain.
5 """
6 Directives for table elements.
7 """
9 __docformat__ = 'reStructuredText'
12 import sys
13 import os.path
14 import csv
16 from docutils import io, nodes, statemachine, utils
17 from docutils.utils.error_reporting import SafeString
18 from docutils.utils import SystemMessagePropagation
19 from docutils.parsers.rst import Directive
20 from docutils.parsers.rst import directives
23 class Table(Directive):
25 """
26 Generic table base class.
27 """
29 optional_arguments = 1
30 final_argument_whitespace = True
31 option_spec = {'class': directives.class_option,
32 'name': directives.unchanged}
33 has_content = True
35 def make_title(self):
36 if self.arguments:
37 title_text = self.arguments[0]
38 text_nodes, messages = self.state.inline_text(title_text,
39 self.lineno)
40 title = nodes.title(title_text, '', *text_nodes)
41 else:
42 title = None
43 messages = []
44 return title, messages
46 def process_header_option(self):
47 source = self.state_machine.get_source(self.lineno - 1)
48 table_head = []
49 max_header_cols = 0
50 if 'header' in self.options: # separate table header in option
51 rows, max_header_cols = self.parse_csv_data_into_rows(
52 self.options['header'].split('\n'), self.HeaderDialect(),
53 source)
54 table_head.extend(rows)
55 return table_head, max_header_cols
57 def check_table_dimensions(self, rows, header_rows, stub_columns):
58 if len(rows) < header_rows:
59 error = self.state_machine.reporter.error(
60 '%s header row(s) specified but only %s row(s) of data '
61 'supplied ("%s" directive).'
62 % (header_rows, len(rows), self.name), nodes.literal_block(
63 self.block_text, self.block_text), line=self.lineno)
64 raise SystemMessagePropagation(error)
65 if len(rows) == header_rows > 0:
66 error = self.state_machine.reporter.error(
67 'Insufficient data supplied (%s row(s)); no data remaining '
68 'for table body, required by "%s" directive.'
69 % (len(rows), self.name), nodes.literal_block(
70 self.block_text, self.block_text), line=self.lineno)
71 raise SystemMessagePropagation(error)
72 for row in rows:
73 if len(row) < stub_columns:
74 error = self.state_machine.reporter.error(
75 '%s stub column(s) specified but only %s columns(s) of '
76 'data supplied ("%s" directive).' %
77 (stub_columns, len(row), self.name), nodes.literal_block(
78 self.block_text, self.block_text), line=self.lineno)
79 raise SystemMessagePropagation(error)
80 if len(row) == stub_columns > 0:
81 error = self.state_machine.reporter.error(
82 'Insufficient data supplied (%s columns(s)); no data remaining '
83 'for table body, required by "%s" directive.'
84 % (len(row), self.name), nodes.literal_block(
85 self.block_text, self.block_text), line=self.lineno)
86 raise SystemMessagePropagation(error)
88 def get_column_widths(self, max_cols):
89 if 'widths' in self.options:
90 col_widths = self.options['widths']
91 if len(col_widths) != max_cols:
92 error = self.state_machine.reporter.error(
93 '"%s" widths do not match the number of columns in table '
94 '(%s).' % (self.name, max_cols), nodes.literal_block(
95 self.block_text, self.block_text), line=self.lineno)
96 raise SystemMessagePropagation(error)
97 elif max_cols:
98 col_widths = [100 // max_cols] * max_cols
99 else:
100 error = self.state_machine.reporter.error(
101 'No table data detected in CSV file.', nodes.literal_block(
102 self.block_text, self.block_text), line=self.lineno)
103 raise SystemMessagePropagation(error)
104 return col_widths
106 def extend_short_rows_with_empty_cells(self, columns, parts):
107 for part in parts:
108 for row in part:
109 if len(row) < columns:
110 row.extend([(0, 0, 0, [])] * (columns - len(row)))
113 class RSTTable(Table):
115 def run(self):
116 if not self.content:
117 warning = self.state_machine.reporter.warning(
118 'Content block expected for the "%s" directive; none found.'
119 % self.name, nodes.literal_block(
120 self.block_text, self.block_text), line=self.lineno)
121 return [warning]
122 title, messages = self.make_title()
123 node = nodes.Element() # anonymous container for parsing
124 self.state.nested_parse(self.content, self.content_offset, node)
125 if len(node) != 1 or not isinstance(node[0], nodes.table):
126 error = self.state_machine.reporter.error(
127 'Error parsing content block for the "%s" directive: exactly '
128 'one table expected.' % self.name, nodes.literal_block(
129 self.block_text, self.block_text), line=self.lineno)
130 return [error]
131 table_node = node[0]
132 table_node['classes'] += self.options.get('class', [])
133 self.add_name(table_node)
134 if title:
135 table_node.insert(0, title)
136 return [table_node] + messages
139 class CSVTable(Table):
141 option_spec = {'header-rows': directives.nonnegative_int,
142 'stub-columns': directives.nonnegative_int,
143 'header': directives.unchanged,
144 'widths': directives.positive_int_list,
145 'file': directives.path,
146 'url': directives.uri,
147 'encoding': directives.encoding,
148 'class': directives.class_option,
149 'name': directives.unchanged,
150 # field delimiter char
151 'delim': directives.single_char_or_whitespace_or_unicode,
152 # treat whitespace after delimiter as significant
153 'keepspace': directives.flag,
154 # text field quote/unquote char:
155 'quote': directives.single_char_or_unicode,
156 # char used to escape delim & quote as-needed:
157 'escape': directives.single_char_or_unicode,}
159 class DocutilsDialect(csv.Dialect):
161 """CSV dialect for `csv_table` directive."""
163 delimiter = ','
164 quotechar = '"'
165 doublequote = True
166 skipinitialspace = True
167 lineterminator = '\n'
168 quoting = csv.QUOTE_MINIMAL
170 def __init__(self, options):
171 if 'delim' in options:
172 self.delimiter = str(options['delim'])
173 if 'keepspace' in options:
174 self.skipinitialspace = False
175 if 'quote' in options:
176 self.quotechar = str(options['quote'])
177 if 'escape' in options:
178 self.doublequote = False
179 self.escapechar = str(options['escape'])
180 csv.Dialect.__init__(self)
183 class HeaderDialect(csv.Dialect):
185 """CSV dialect to use for the "header" option data."""
187 delimiter = ','
188 quotechar = '"'
189 escapechar = '\\'
190 doublequote = False
191 skipinitialspace = True
192 lineterminator = '\n'
193 quoting = csv.QUOTE_MINIMAL
195 def check_requirements(self):
196 pass
198 def run(self):
199 try:
200 if (not self.state.document.settings.file_insertion_enabled
201 and ('file' in self.options
202 or 'url' in self.options)):
203 warning = self.state_machine.reporter.warning(
204 'File and URL access deactivated; ignoring "%s" '
205 'directive.' % self.name, nodes.literal_block(
206 self.block_text, self.block_text), line=self.lineno)
207 return [warning]
208 self.check_requirements()
209 title, messages = self.make_title()
210 csv_data, source = self.get_csv_data()
211 table_head, max_header_cols = self.process_header_option()
212 rows, max_cols = self.parse_csv_data_into_rows(
213 csv_data, self.DocutilsDialect(self.options), source)
214 max_cols = max(max_cols, max_header_cols)
215 header_rows = self.options.get('header-rows', 0)
216 stub_columns = self.options.get('stub-columns', 0)
217 self.check_table_dimensions(rows, header_rows, stub_columns)
218 table_head.extend(rows[:header_rows])
219 table_body = rows[header_rows:]
220 col_widths = self.get_column_widths(max_cols)
221 self.extend_short_rows_with_empty_cells(max_cols,
222 (table_head, table_body))
223 except SystemMessagePropagation, detail:
224 return [detail.args[0]]
225 except csv.Error, detail:
226 error = self.state_machine.reporter.error(
227 'Error with CSV data in "%s" directive:\n%s'
228 % (self.name, detail), nodes.literal_block(
229 self.block_text, self.block_text), line=self.lineno)
230 return [error]
231 table = (col_widths, table_head, table_body)
232 table_node = self.state.build_table(table, self.content_offset,
233 stub_columns)
234 table_node['classes'] += self.options.get('class', [])
235 self.add_name(table_node)
236 if title:
237 table_node.insert(0, title)
238 return [table_node] + messages
240 def get_csv_data(self):
242 Get CSV data from the directive content, from an external
243 file, or from a URL reference.
245 encoding = self.options.get(
246 'encoding', self.state.document.settings.input_encoding)
247 if self.content:
248 # CSV data is from directive content.
249 if 'file' in self.options or 'url' in self.options:
250 error = self.state_machine.reporter.error(
251 '"%s" directive may not both specify an external file and'
252 ' have content.' % self.name, nodes.literal_block(
253 self.block_text, self.block_text), line=self.lineno)
254 raise SystemMessagePropagation(error)
255 source = self.content.source(0)
256 csv_data = self.content
257 elif 'file' in self.options:
258 # CSV data is from an external file.
259 if 'url' in self.options:
260 error = self.state_machine.reporter.error(
261 'The "file" and "url" options may not be simultaneously'
262 ' specified for the "%s" directive.' % self.name,
263 nodes.literal_block(self.block_text, self.block_text),
264 line=self.lineno)
265 raise SystemMessagePropagation(error)
266 source_dir = os.path.dirname(
267 os.path.abspath(self.state.document.current_source))
268 source = os.path.normpath(os.path.join(source_dir,
269 self.options['file']))
270 source = utils.relative_path(None, source)
271 try:
272 self.state.document.settings.record_dependencies.add(source)
273 csv_file = io.FileInput(
274 source_path=source, encoding=encoding,
275 error_handler=(self.state.document.settings.\
276 input_encoding_error_handler),
277 handle_io_errors=None)
278 csv_data = csv_file.read().splitlines()
279 except IOError, error:
280 severe = self.state_machine.reporter.severe(
281 u'Problems with "%s" directive path:\n%s.'
282 % (self.name, SafeString(error)),
283 nodes.literal_block(self.block_text, self.block_text),
284 line=self.lineno)
285 raise SystemMessagePropagation(severe)
286 elif 'url' in self.options:
287 # CSV data is from a URL.
288 # Do not import urllib2 at the top of the module because
289 # it may fail due to broken SSL dependencies, and it takes
290 # about 0.15 seconds to load.
291 import urllib2
292 source = self.options['url']
293 try:
294 csv_text = urllib2.urlopen(source).read()
295 except (urllib2.URLError, IOError, OSError, ValueError), error:
296 severe = self.state_machine.reporter.severe(
297 'Problems with "%s" directive URL "%s":\n%s.'
298 % (self.name, self.options['url'], SafeString(error)),
299 nodes.literal_block(self.block_text, self.block_text),
300 line=self.lineno)
301 raise SystemMessagePropagation(severe)
302 csv_file = io.StringInput(
303 source=csv_text, source_path=source, encoding=encoding,
304 error_handler=(self.state.document.settings.\
305 input_encoding_error_handler))
306 csv_data = csv_file.read().splitlines()
307 else:
308 error = self.state_machine.reporter.warning(
309 'The "%s" directive requires content; none supplied.'
310 % self.name, nodes.literal_block(
311 self.block_text, self.block_text), line=self.lineno)
312 raise SystemMessagePropagation(error)
313 return csv_data, source
315 if sys.version_info < (3,):
316 # 2.x csv module doesn't do Unicode
317 def decode_from_csv(s):
318 return s.decode('utf-8')
319 def encode_for_csv(s):
320 return s.encode('utf-8')
321 else:
322 def decode_from_csv(s):
323 return s
324 def encode_for_csv(s):
325 return s
326 decode_from_csv = staticmethod(decode_from_csv)
327 encode_for_csv = staticmethod(encode_for_csv)
329 def parse_csv_data_into_rows(self, csv_data, dialect, source):
330 # csv.py doesn't do Unicode; encode temporarily as UTF-8
331 csv_reader = csv.reader([self.encode_for_csv(line + '\n')
332 for line in csv_data],
333 dialect=dialect)
334 rows = []
335 max_cols = 0
336 for row in csv_reader:
337 row_data = []
338 for cell in row:
339 # decode UTF-8 back to Unicode
340 cell_text = self.decode_from_csv(cell)
341 cell_data = (0, 0, 0, statemachine.StringList(
342 cell_text.splitlines(), source=source))
343 row_data.append(cell_data)
344 rows.append(row_data)
345 max_cols = max(max_cols, len(row))
346 return rows, max_cols
349 class ListTable(Table):
352 Implement tables whose data is encoded as a uniform two-level bullet list.
353 For further ideas, see
354 http://docutils.sf.net/docs/dev/rst/alternatives.html#list-driven-tables
355 """
357 option_spec = {'header-rows': directives.nonnegative_int,
358 'stub-columns': directives.nonnegative_int,
359 'widths': directives.positive_int_list,
360 'class': directives.class_option,
361 'name': directives.unchanged}
363 def run(self):
364 if not self.content:
365 error = self.state_machine.reporter.error(
366 'The "%s" directive is empty; content required.' % self.name,
367 nodes.literal_block(self.block_text, self.block_text),
368 line=self.lineno)
369 return [error]
370 title, messages = self.make_title()
371 node = nodes.Element() # anonymous container for parsing
372 self.state.nested_parse(self.content, self.content_offset, node)
373 try:
374 num_cols, col_widths = self.check_list_content(node)
375 table_data = [[item.children for item in row_list[0]]
376 for row_list in node[0]]
377 header_rows = self.options.get('header-rows', 0)
378 stub_columns = self.options.get('stub-columns', 0)
379 self.check_table_dimensions(table_data, header_rows, stub_columns)
380 except SystemMessagePropagation, detail:
381 return [detail.args[0]]
382 table_node = self.build_table_from_list(table_data, col_widths,
383 header_rows, stub_columns)
384 table_node['classes'] += self.options.get('class', [])
385 self.add_name(table_node)
386 if title:
387 table_node.insert(0, title)
388 return [table_node] + messages
390 def check_list_content(self, node):
391 if len(node) != 1 or not isinstance(node[0], nodes.bullet_list):
392 error = self.state_machine.reporter.error(
393 'Error parsing content block for the "%s" directive: '
394 'exactly one bullet list expected.' % self.name,
395 nodes.literal_block(self.block_text, self.block_text),
396 line=self.lineno)
397 raise SystemMessagePropagation(error)
398 list_node = node[0]
399 # Check for a uniform two-level bullet list:
400 for item_index in range(len(list_node)):
401 item = list_node[item_index]
402 if len(item) != 1 or not isinstance(item[0], nodes.bullet_list):
403 error = self.state_machine.reporter.error(
404 'Error parsing content block for the "%s" directive: '
405 'two-level bullet list expected, but row %s does not '
406 'contain a second-level bullet list.'
407 % (self.name, item_index + 1), nodes.literal_block(
408 self.block_text, self.block_text), line=self.lineno)
409 raise SystemMessagePropagation(error)
410 elif item_index:
411 # ATTN pychecker users: num_cols is guaranteed to be set in the
412 # "else" clause below for item_index==0, before this branch is
413 # triggered.
414 if len(item[0]) != num_cols:
415 error = self.state_machine.reporter.error(
416 'Error parsing content block for the "%s" directive: '
417 'uniform two-level bullet list expected, but row %s '
418 'does not contain the same number of items as row 1 '
419 '(%s vs %s).'
420 % (self.name, item_index + 1, len(item[0]), num_cols),
421 nodes.literal_block(self.block_text, self.block_text),
422 line=self.lineno)
423 raise SystemMessagePropagation(error)
424 else:
425 num_cols = len(item[0])
426 col_widths = self.get_column_widths(num_cols)
427 return num_cols, col_widths
429 def build_table_from_list(self, table_data, col_widths, header_rows, stub_columns):
430 table = nodes.table()
431 tgroup = nodes.tgroup(cols=len(col_widths))
432 table += tgroup
433 for col_width in col_widths:
434 colspec = nodes.colspec(colwidth=col_width)
435 if stub_columns:
436 colspec.attributes['stub'] = 1
437 stub_columns -= 1
438 tgroup += colspec
439 rows = []
440 for row in table_data:
441 row_node = nodes.row()
442 for cell in row:
443 entry = nodes.entry()
444 entry += cell
445 row_node += entry
446 rows.append(row_node)
447 if header_rows:
448 thead = nodes.thead()
449 thead.extend(rows[:header_rows])
450 tgroup += thead
451 tbody = nodes.tbody()
452 tbody.extend(rows[header_rows:])
453 tgroup += tbody
454 return table