docutils/parsers/rst/directives/tables.py

   1 # $Id$
   2 # Authors: David Goodger <goodger@python.org>; David Priest
   3 # Copyright: This module has been placed in the public domain.
   4
   5 """
   6 Directives for table elements.
   7 """
   8
   9 __docformat__ = 'reStructuredText'
  10
  11
  12 import sys
  13 import os.path
  14 import csv
  15
  16 from docutils import io, nodes, statemachine, utils
  17 from docutils.error_reporting import SafeString
  18 from docutils.utils import SystemMessagePropagation
  19 from docutils.parsers.rst import Directive
  20 from docutils.parsers.rst import directives
  21
  22
  23 class Table(Directive):
  24
  25     """
  26     Generic table base class.
  27     """
  28
  29     required_arguments = 0
  30     optional_arguments = 1
  31     final_argument_whitespace = True
  32     option_spec = {'class': directives.class_option,
  33                    'name': directives.unchanged}
  34     has_content = True
  35
  36     def make_title(self):
  37         if self.arguments:
  38             title_text = self.arguments[0]
  39             text_nodes, messages = self.state.inline_text(title_text,
  40                                                           self.lineno)
  41             title = nodes.title(title_text, '', *text_nodes)
  42         else:
  43             title = None
  44             messages = []
  45         return title, messages
  46
  47     def process_header_option(self):
  48         source = self.state_machine.get_source(self.lineno - 1)
  49         table_head = []
  50         max_header_cols = 0
  51         if 'header' in self.options:   # separate table header in option
  52             rows, max_header_cols = self.parse_csv_data_into_rows(
  53                 self.options['header'].split('\n'), self.HeaderDialect(),
  54                 source)
  55             table_head.extend(rows)
  56         return table_head, max_header_cols
  57
  58     def check_table_dimensions(self, rows, header_rows, stub_columns):
  59         if len(rows) < header_rows:
  60             error = self.state_machine.reporter.error(
  61                 '%s header row(s) specified but only %s row(s) of data '
  62                 'supplied ("%s" directive).'
  63                 % (header_rows, len(rows), self.name), nodes.literal_block(
  64                 self.block_text, self.block_text), line=self.lineno)
  65             raise SystemMessagePropagation(error)
  66         if len(rows) == header_rows > 0:
  67             error = self.state_machine.reporter.error(
  68                 'Insufficient data supplied (%s row(s)); no data remaining '
  69                 'for table body, required by "%s" directive.'
  70                 % (len(rows), self.name), nodes.literal_block(
  71                 self.block_text, self.block_text), line=self.lineno)
  72             raise SystemMessagePropagation(error)
  73         for row in rows:
  74             if len(row) < stub_columns:
  75                 error = self.state_machine.reporter.error(
  76                     '%s stub column(s) specified but only %s columns(s) of '
  77                     'data supplied ("%s" directive).' %
  78                     (stub_columns, len(row), self.name), nodes.literal_block(
  79                     self.block_text, self.block_text), line=self.lineno)
  80                 raise SystemMessagePropagation(error)
  81             if len(row) == stub_columns > 0:
  82                 error = self.state_machine.reporter.error(
  83                     'Insufficient data supplied (%s columns(s)); no data remaining '
  84                     'for table body, required by "%s" directive.'
  85                     % (len(row), self.name), nodes.literal_block(
  86                     self.block_text, self.block_text), line=self.lineno)
  87                 raise SystemMessagePropagation(error)
  88
  89     def get_column_widths(self, max_cols):
  90         if 'widths' in self.options:
  91             col_widths = self.options['widths']
  92             if len(col_widths) != max_cols:
  93                 error = self.state_machine.reporter.error(
  94                     '"%s" widths do not match the number of columns in table '
  95                     '(%s).' % (self.name, max_cols), nodes.literal_block(
  96                     self.block_text, self.block_text), line=self.lineno)
  97                 raise SystemMessagePropagation(error)
  98         elif max_cols:
  99             col_widths = [100 // max_cols] * max_cols
 100         else:
 101             error = self.state_machine.reporter.error(
 102                 'No table data detected in CSV file.', nodes.literal_block(
 103                 self.block_text, self.block_text), line=self.lineno)
 104             raise SystemMessagePropagation(error)
 105         return col_widths
 106
 107     def extend_short_rows_with_empty_cells(self, columns, parts):
 108         for part in parts:
 109             for row in part:
 110                 if len(row) < columns:
 111                     row.extend([(0, 0, 0, [])] * (columns - len(row)))
 112
 113
 114 class RSTTable(Table):
 115
 116     def run(self):
 117         if not self.content:
 118             warning = self.state_machine.reporter.warning(
 119                 'Content block expected for the "%s" directive; none found.'
 120                 % self.name, nodes.literal_block(
 121                 self.block_text, self.block_text), line=self.lineno)
 122             return [warning]
 123         title, messages = self.make_title()
 124         node = nodes.Element()          # anonymous container for parsing
 125         self.state.nested_parse(self.content, self.content_offset, node)
 126         if len(node) != 1 or not isinstance(node[0], nodes.table):
 127             error = self.state_machine.reporter.error(
 128                 'Error parsing content block for the "%s" directive: exactly '
 129                 'one table expected.' % self.name, nodes.literal_block(
 130                 self.block_text, self.block_text), line=self.lineno)
 131             return [error]
 132         table_node = node[0]
 133         table_node['classes'] += self.options.get('class', [])
 134         self.add_name(table_node)
 135         if title:
 136             table_node.insert(0, title)
 137         return [table_node] + messages
 138
 139
 140 class CSVTable(Table):
 141
 142     option_spec = {'header-rows': directives.nonnegative_int,
 143                    'stub-columns': directives.nonnegative_int,
 144                    'header': directives.unchanged,
 145                    'widths': directives.positive_int_list,
 146                    'file': directives.path,
 147                    'url': directives.uri,
 148                    'encoding': directives.encoding,
 149                    'class': directives.class_option,
 150                    'name': directives.unchanged,
 151                    # field delimiter char
 152                    'delim': directives.single_char_or_whitespace_or_unicode,
 153                    # treat whitespace after delimiter as significant
 154                    'keepspace': directives.flag,
 155                    # text field quote/unquote char:
 156                    'quote': directives.single_char_or_unicode,
 157                    # char used to escape delim & quote as-needed:
 158                    'escape': directives.single_char_or_unicode,}
 159
 160     class DocutilsDialect(csv.Dialect):
 161
 162         """CSV dialect for `csv_table` directive."""
 163
 164         delimiter = ','
 165         quotechar = '"'
 166         doublequote = True
 167         skipinitialspace = True
 168         lineterminator = '\n'
 169         quoting = csv.QUOTE_MINIMAL
 170
 171         def __init__(self, options):
 172             if 'delim' in options:
 173                 self.delimiter = str(options['delim'])
 174             if 'keepspace' in options:
 175                 self.skipinitialspace = False
 176             if 'quote' in options:
 177                 self.quotechar = str(options['quote'])
 178             if 'escape' in options:
 179                 self.doublequote = False
 180                 self.escapechar = str(options['escape'])
 181             csv.Dialect.__init__(self)
 182
 183
 184     class HeaderDialect(csv.Dialect):
 185
 186         """CSV dialect to use for the "header" option data."""
 187
 188         delimiter = ','
 189         quotechar = '"'
 190         escapechar = '\\'
 191         doublequote = False
 192         skipinitialspace = True
 193         lineterminator = '\n'
 194         quoting = csv.QUOTE_MINIMAL
 195
 196     def check_requirements(self):
 197         pass
 198
 199     def run(self):
 200         try:
 201             if (not self.state.document.settings.file_insertion_enabled
 202                 and ('file' in self.options
 203                      or 'url' in self.options)):
 204                 warning = self.state_machine.reporter.warning(
 205                     'File and URL access deactivated; ignoring "%s" '
 206                     'directive.' % self.name, nodes.literal_block(
 207                     self.block_text, self.block_text), line=self.lineno)
 208                 return [warning]
 209             self.check_requirements()
 210             title, messages = self.make_title()
 211             csv_data, source = self.get_csv_data()
 212             table_head, max_header_cols = self.process_header_option()
 213             rows, max_cols = self.parse_csv_data_into_rows(
 214                 csv_data, self.DocutilsDialect(self.options), source)
 215             max_cols = max(max_cols, max_header_cols)
 216             header_rows = self.options.get('header-rows', 0)
 217             stub_columns = self.options.get('stub-columns', 0)
 218             self.check_table_dimensions(rows, header_rows, stub_columns)
 219             table_head.extend(rows[:header_rows])
 220             table_body = rows[header_rows:]
 221             col_widths = self.get_column_widths(max_cols)
 222             self.extend_short_rows_with_empty_cells(max_cols,
 223                                                     (table_head, table_body))
 224         except SystemMessagePropagation, detail:
 225             return [detail.args[0]]
 226         except csv.Error, detail:
 227             error = self.state_machine.reporter.error(
 228                 'Error with CSV data in "%s" directive:\n%s'
 229                 % (self.name, detail), nodes.literal_block(
 230                 self.block_text, self.block_text), line=self.lineno)
 231             return [error]
 232         table = (col_widths, table_head, table_body)
 233         table_node = self.state.build_table(table, self.content_offset,
 234                                             stub_columns)
 235         table_node['classes'] += self.options.get('class', [])
 236         self.add_name(table_node)
 237         if title:
 238             table_node.insert(0, title)
 239         return [table_node] + messages
 240
 241     def get_csv_data(self):
 242         """
 243         Get CSV data from the directive content, from an external
 244         file, or from a URL reference.
 245         """
 246         encoding = self.options.get(
 247             'encoding', self.state.document.settings.input_encoding)
 248         if self.content:
 249             # CSV data is from directive content.
 250             if 'file' in self.options or 'url' in self.options:
 251                 error = self.state_machine.reporter.error(
 252                     '"%s" directive may not both specify an external file and'
 253                     ' have content.' % self.name, nodes.literal_block(
 254                     self.block_text, self.block_text), line=self.lineno)
 255                 raise SystemMessagePropagation(error)
 256             source = self.content.source(0)
 257             csv_data = self.content
 258         elif 'file' in self.options:
 259             # CSV data is from an external file.
 260             if 'url' in self.options:
 261                 error = self.state_machine.reporter.error(
 262                       'The "file" and "url" options may not be simultaneously'
 263                       ' specified for the "%s" directive.' % self.name,
 264                       nodes.literal_block(self.block_text, self.block_text),
 265                       line=self.lineno)
 266                 raise SystemMessagePropagation(error)
 267             source_dir = os.path.dirname(
 268                 os.path.abspath(self.state.document.current_source))
 269             source = os.path.normpath(os.path.join(source_dir,
 270                                                    self.options['file']))
 271             source = utils.relative_path(None, source)
 272             try:
 273                 self.state.document.settings.record_dependencies.add(source)
 274                 csv_file = io.FileInput(
 275                     source_path=source, encoding=encoding,
 276                     error_handler=(self.state.document.settings.\
 277                                    input_encoding_error_handler),
 278                     handle_io_errors=None)
 279                 csv_data = csv_file.read().splitlines()
 280             except IOError, error:
 281                 severe = self.state_machine.reporter.severe(
 282                     u'Problems with "%s" directive path:\n%s.'
 283                     % (self.name, SafeString(error)),
 284                     nodes.literal_block(self.block_text, self.block_text),
 285                     line=self.lineno)
 286                 raise SystemMessagePropagation(severe)
 287         elif 'url' in self.options:
 288             # CSV data is from a URL.
 289             # Do not import urllib2 at the top of the module because
 290             # it may fail due to broken SSL dependencies, and it takes
 291             # about 0.15 seconds to load.
 292             import urllib2
 293             source = self.options['url']
 294             try:
 295                 csv_text = urllib2.urlopen(source).read()
 296             except (urllib2.URLError, IOError, OSError, ValueError), error:
 297                 severe = self.state_machine.reporter.severe(
 298                       'Problems with "%s" directive URL "%s":\n%s.'
 299                       % (self.name, self.options['url'], SafeString(error)),
 300                       nodes.literal_block(self.block_text, self.block_text),
 301                       line=self.lineno)
 302                 raise SystemMessagePropagation(severe)
 303             csv_file = io.StringInput(
 304                 source=csv_text, source_path=source, encoding=encoding,
 305                 error_handler=(self.state.document.settings.\
 306                                input_encoding_error_handler))
 307             csv_data = csv_file.read().splitlines()
 308         else:
 309             error = self.state_machine.reporter.warning(
 310                 'The "%s" directive requires content; none supplied.'
 311                 % self.name, nodes.literal_block(
 312                 self.block_text, self.block_text), line=self.lineno)
 313             raise SystemMessagePropagation(error)
 314         return csv_data, source
 315
 316     if sys.version_info < (3,):
 317         # 2.x csv module doesn't do Unicode
 318         def decode_from_csv(s):
 319             return s.decode('utf-8')
 320         def encode_for_csv(s):
 321             return s.encode('utf-8')
 322     else:
 323         def decode_from_csv(s):
 324             return s
 325         def encode_for_csv(s):
 326             return s
 327     decode_from_csv = staticmethod(decode_from_csv)
 328     encode_for_csv = staticmethod(encode_for_csv)
 329
 330     def parse_csv_data_into_rows(self, csv_data, dialect, source):
 331         # csv.py doesn't do Unicode; encode temporarily as UTF-8
 332         csv_reader = csv.reader([self.encode_for_csv(line + '\n')
 333                                  for line in csv_data],
 334                                 dialect=dialect)
 335         rows = []
 336         max_cols = 0
 337         for row in csv_reader:
 338             row_data = []
 339             for cell in row:
 340                 # decode UTF-8 back to Unicode
 341                 cell_text = self.decode_from_csv(cell)
 342                 cell_data = (0, 0, 0, statemachine.StringList(
 343                     cell_text.splitlines(), source=source))
 344                 row_data.append(cell_data)
 345             rows.append(row_data)
 346             max_cols = max(max_cols, len(row))
 347         return rows, max_cols
 348
 349
 350 class ListTable(Table):
 351
 352     """
 353     Implement tables whose data is encoded as a uniform two-level bullet list.
 354     For further ideas, see
 355     http://docutils.sf.net/docs/dev/rst/alternatives.html#list-driven-tables
 356     """
 357
 358     option_spec = {'header-rows': directives.nonnegative_int,
 359                    'stub-columns': directives.nonnegative_int,
 360                    'widths': directives.positive_int_list,
 361                    'class': directives.class_option,
 362                    'name': directives.unchanged}
 363
 364     def run(self):
 365         if not self.content:
 366             error = self.state_machine.reporter.error(
 367                 'The "%s" directive is empty; content required.' % self.name,
 368                 nodes.literal_block(self.block_text, self.block_text),
 369                 line=self.lineno)
 370             return [error]
 371         title, messages = self.make_title()
 372         node = nodes.Element()          # anonymous container for parsing
 373         self.state.nested_parse(self.content, self.content_offset, node)
 374         try:
 375             num_cols, col_widths = self.check_list_content(node)
 376             table_data = [[item.children for item in row_list[0]]
 377                           for row_list in node[0]]
 378             header_rows = self.options.get('header-rows', 0)
 379             stub_columns = self.options.get('stub-columns', 0)
 380             self.check_table_dimensions(table_data, header_rows, stub_columns)
 381         except SystemMessagePropagation, detail:
 382             return [detail.args[0]]
 383         table_node = self.build_table_from_list(table_data, col_widths,
 384                                                 header_rows, stub_columns)
 385         table_node['classes'] += self.options.get('class', [])
 386         self.add_name(table_node)
 387         if title:
 388             table_node.insert(0, title)
 389         return [table_node] + messages
 390
 391     def check_list_content(self, node):
 392         if len(node) != 1 or not isinstance(node[0], nodes.bullet_list):
 393             error = self.state_machine.reporter.error(
 394                 'Error parsing content block for the "%s" directive: '
 395                 'exactly one bullet list expected.' % self.name,
 396                 nodes.literal_block(self.block_text, self.block_text),
 397                 line=self.lineno)
 398             raise SystemMessagePropagation(error)
 399         list_node = node[0]
 400         # Check for a uniform two-level bullet list:
 401         for item_index in range(len(list_node)):
 402             item = list_node[item_index]
 403             if len(item) != 1 or not isinstance(item[0], nodes.bullet_list):
 404                 error = self.state_machine.reporter.error(
 405                     'Error parsing content block for the "%s" directive: '
 406                     'two-level bullet list expected, but row %s does not '
 407                     'contain a second-level bullet list.'
 408                     % (self.name, item_index + 1), nodes.literal_block(
 409                     self.block_text, self.block_text), line=self.lineno)
 410                 raise SystemMessagePropagation(error)
 411             elif item_index:
 412                 # ATTN pychecker users: num_cols is guaranteed to be set in the
 413                 # "else" clause below for item_index==0, before this branch is
 414                 # triggered.
 415                 if len(item[0]) != num_cols:
 416                     error = self.state_machine.reporter.error(
 417                         'Error parsing content block for the "%s" directive: '
 418                         'uniform two-level bullet list expected, but row %s '
 419                         'does not contain the same number of items as row 1 '
 420                         '(%s vs %s).'
 421                         % (self.name, item_index + 1, len(item[0]), num_cols),
 422                         nodes.literal_block(self.block_text, self.block_text),
 423                         line=self.lineno)
 424                     raise SystemMessagePropagation(error)
 425             else:
 426                 num_cols = len(item[0])
 427         col_widths = self.get_column_widths(num_cols)
 428         return num_cols, col_widths
 429
 430     def build_table_from_list(self, table_data, col_widths, header_rows, stub_columns):
 431         table = nodes.table()
 432         tgroup = nodes.tgroup(cols=len(col_widths))
 433         table += tgroup
 434         for col_width in col_widths:
 435             colspec = nodes.colspec(colwidth=col_width)
 436             if stub_columns:
 437                 colspec.attributes['stub'] = 1
 438                 stub_columns -= 1
 439             tgroup += colspec
 440         rows = []
 441         for row in table_data:
 442             row_node = nodes.row()
 443             for cell in row:
 444                 entry = nodes.entry()
 445                 entry += cell
 446                 row_node += entry
 447             rows.append(row_node)
 448         if header_rows:
 449             thead = nodes.thead()
 450             thead.extend(rows[:header_rows])
 451             tgroup += thead
 452         tbody = nodes.tbody()
 453         tbody.extend(rows[header_rows:])
 454         tgroup += tbody
 455         return table