docutils/docutils/parsers/rst/directives/tables.py

   1 # $Id$
   2 # Authors: David Goodger <goodger@python.org>; David Priest
   3 # Copyright: This module has been placed in the public domain.
   4
   5 """
   6 Directives for table elements.
   7 """
   8
   9 __docformat__ = 'reStructuredText'
  10
  11
  12 import sys
  13 import os.path
  14 import csv
  15
  16 from docutils import io, nodes, statemachine, utils
  17 from docutils.utils import SystemMessagePropagation
  18 from docutils.parsers.rst import Directive
  19 from docutils.parsers.rst import directives
  20
  21
  22 class Table(Directive):
  23
  24     """
  25     Generic table base class.
  26     """
  27
  28     required_arguments = 0
  29     optional_arguments = 1
  30     final_argument_whitespace = True
  31     option_spec = {'class': directives.class_option}
  32     has_content = True
  33
  34     def make_title(self):
  35         if self.arguments:
  36             title_text = self.arguments[0]
  37             text_nodes, messages = self.state.inline_text(title_text,
  38                                                           self.lineno)
  39             title = nodes.title(title_text, '', *text_nodes)
  40         else:
  41             title = None
  42             messages = []
  43         return title, messages
  44
  45     def process_header_option(self):
  46         source = self.state_machine.get_source(self.lineno - 1)
  47         table_head = []
  48         max_header_cols = 0
  49         if 'header' in self.options:   # separate table header in option
  50             rows, max_header_cols = self.parse_csv_data_into_rows(
  51                 self.options['header'].split('\n'), self.HeaderDialect(),
  52                 source)
  53             table_head.extend(rows)
  54         return table_head, max_header_cols
  55
  56     def check_table_dimensions(self, rows, header_rows, stub_columns):
  57         if len(rows) < header_rows:
  58             error = self.state_machine.reporter.error(
  59                 '%s header row(s) specified but only %s row(s) of data '
  60                 'supplied ("%s" directive).'
  61                 % (header_rows, len(rows), self.name), nodes.literal_block(
  62                 self.block_text, self.block_text), line=self.lineno)
  63             raise SystemMessagePropagation(error)
  64         if len(rows) == header_rows > 0:
  65             error = self.state_machine.reporter.error(
  66                 'Insufficient data supplied (%s row(s)); no data remaining '
  67                 'for table body, required by "%s" directive.'
  68                 % (len(rows), self.name), nodes.literal_block(
  69                 self.block_text, self.block_text), line=self.lineno)
  70             raise SystemMessagePropagation(error)
  71         for row in rows:
  72             if len(row) < stub_columns:
  73                 error = self.state_machine.reporter.error(
  74                     '%s stub column(s) specified but only %s columns(s) of '
  75                     'data supplied ("%s" directive).' %
  76                     (stub_columns, len(row), self.name), nodes.literal_block(
  77                     self.block_text, self.block_text), line=self.lineno)
  78                 raise SystemMessagePropagation(error)
  79             if len(row) == stub_columns > 0:
  80                 error = self.state_machine.reporter.error(
  81                     'Insufficient data supplied (%s columns(s)); no data remaining '
  82                     'for table body, required by "%s" directive.'
  83                     % (len(row), self.name), nodes.literal_block(
  84                     self.block_text, self.block_text), line=self.lineno)
  85                 raise SystemMessagePropagation(error)
  86
  87     def get_column_widths(self, max_cols):
  88         if 'widths' in self.options:
  89             col_widths = self.options['widths']
  90             if len(col_widths) != max_cols:
  91                 error = self.state_machine.reporter.error(
  92                     '"%s" widths do not match the number of columns in table '
  93                     '(%s).' % (self.name, max_cols), nodes.literal_block(
  94                     self.block_text, self.block_text), line=self.lineno)
  95                 raise SystemMessagePropagation(error)
  96         elif max_cols:
  97             col_widths = [100 // max_cols] * max_cols
  98         else:
  99             error = self.state_machine.reporter.error(
 100                 'No table data detected in CSV file.', nodes.literal_block(
 101                 self.block_text, self.block_text), line=self.lineno)
 102             raise SystemMessagePropagation(error)
 103         return col_widths
 104
 105     def extend_short_rows_with_empty_cells(self, columns, parts):
 106         for part in parts:
 107             for row in part:
 108                 if len(row) < columns:
 109                     row.extend([(0, 0, 0, [])] * (columns - len(row)))
 110
 111
 112 class RSTTable(Table):
 113
 114     def run(self):
 115         if not self.content:
 116             warning = self.state_machine.reporter.warning(
 117                 'Content block expected for the "%s" directive; none found.'
 118                 % self.name, nodes.literal_block(
 119                 self.block_text, self.block_text), line=self.lineno)
 120             return [warning]
 121         title, messages = self.make_title()
 122         node = nodes.Element()          # anonymous container for parsing
 123         self.state.nested_parse(self.content, self.content_offset, node)
 124         if len(node) != 1 or not isinstance(node[0], nodes.table):
 125             error = self.state_machine.reporter.error(
 126                 'Error parsing content block for the "%s" directive: exactly '
 127                 'one table expected.' % self.name, nodes.literal_block(
 128                 self.block_text, self.block_text), line=self.lineno)
 129             return [error]
 130         table_node = node[0]
 131         table_node['classes'] += self.options.get('class', [])
 132         if title:
 133             table_node.insert(0, title)
 134         return [table_node] + messages
 135
 136
 137 class CSVTable(Table):
 138
 139     option_spec = {'header-rows': directives.nonnegative_int,
 140                    'stub-columns': directives.nonnegative_int,
 141                    'header': directives.unchanged,
 142                    'widths': directives.positive_int_list,
 143                    'file': directives.path,
 144                    'url': directives.uri,
 145                    'encoding': directives.encoding,
 146                    'class': directives.class_option,
 147                    # field delimiter char
 148                    'delim': directives.single_char_or_whitespace_or_unicode,
 149                    # treat whitespace after delimiter as significant
 150                    'keepspace': directives.flag,
 151                    # text field quote/unquote char:
 152                    'quote': directives.single_char_or_unicode,
 153                    # char used to escape delim & quote as-needed:
 154                    'escape': directives.single_char_or_unicode,}
 155
 156     class DocutilsDialect(csv.Dialect):
 157
 158         """CSV dialect for `csv_table` directive."""
 159
 160         delimiter = ','
 161         quotechar = '"'
 162         doublequote = True
 163         skipinitialspace = True
 164         lineterminator = '\n'
 165         quoting = csv.QUOTE_MINIMAL
 166
 167         def __init__(self, options):
 168             if 'delim' in options:
 169                 self.delimiter = str(options['delim'])
 170             if 'keepspace' in options:
 171                 self.skipinitialspace = False
 172             if 'quote' in options:
 173                 self.quotechar = str(options['quote'])
 174             if 'escape' in options:
 175                 self.doublequote = False
 176                 self.escapechar = str(options['escape'])
 177             csv.Dialect.__init__(self)
 178
 179
 180     class HeaderDialect(csv.Dialect):
 181
 182         """CSV dialect to use for the "header" option data."""
 183
 184         delimiter = ','
 185         quotechar = '"'
 186         escapechar = '\\'
 187         doublequote = False
 188         skipinitialspace = True
 189         lineterminator = '\n'
 190         quoting = csv.QUOTE_MINIMAL
 191
 192     def check_requirements(self):
 193         pass
 194
 195     def run(self):
 196         try:
 197             if (not self.state.document.settings.file_insertion_enabled
 198                 and ('file' in self.options
 199                      or 'url' in self.options)):
 200                 warning = self.state_machine.reporter.warning(
 201                     'File and URL access deactivated; ignoring "%s" '
 202                     'directive.' % self.name, nodes.literal_block(
 203                     self.block_text, self.block_text), line=self.lineno)
 204                 return [warning]
 205             self.check_requirements()
 206             title, messages = self.make_title()
 207             csv_data, source = self.get_csv_data()
 208             table_head, max_header_cols = self.process_header_option()
 209             rows, max_cols = self.parse_csv_data_into_rows(
 210                 csv_data, self.DocutilsDialect(self.options), source)
 211             max_cols = max(max_cols, max_header_cols)
 212             header_rows = self.options.get('header-rows', 0)
 213             stub_columns = self.options.get('stub-columns', 0)
 214             self.check_table_dimensions(rows, header_rows, stub_columns)
 215             table_head.extend(rows[:header_rows])
 216             table_body = rows[header_rows:]
 217             col_widths = self.get_column_widths(max_cols)
 218             self.extend_short_rows_with_empty_cells(max_cols,
 219                                                     (table_head, table_body))
 220         except SystemMessagePropagation, detail:
 221             return [detail.args[0]]
 222         except csv.Error, detail:
 223             error = self.state_machine.reporter.error(
 224                 'Error with CSV data in "%s" directive:\n%s'
 225                 % (self.name, detail), nodes.literal_block(
 226                 self.block_text, self.block_text), line=self.lineno)
 227             return [error]
 228         table = (col_widths, table_head, table_body)
 229         table_node = self.state.build_table(table, self.content_offset,
 230                                             stub_columns)
 231         table_node['classes'] += self.options.get('class', [])
 232         if title:
 233             table_node.insert(0, title)
 234         return [table_node] + messages
 235
 236     def get_csv_data(self):
 237         """
 238         Get CSV data from the directive content, from an external
 239         file, or from a URL reference.
 240         """
 241         encoding = self.options.get(
 242             'encoding', self.state.document.settings.input_encoding)
 243         if self.content:
 244             # CSV data is from directive content.
 245             if 'file' in self.options or 'url' in self.options:
 246                 error = self.state_machine.reporter.error(
 247                     '"%s" directive may not both specify an external file and'
 248                     ' have content.' % self.name, nodes.literal_block(
 249                     self.block_text, self.block_text), line=self.lineno)
 250                 raise SystemMessagePropagation(error)
 251             source = self.content.source(0)
 252             csv_data = self.content
 253         elif 'file' in self.options:
 254             # CSV data is from an external file.
 255             if 'url' in self.options:
 256                 error = self.state_machine.reporter.error(
 257                       'The "file" and "url" options may not be simultaneously'
 258                       ' specified for the "%s" directive.' % self.name,
 259                       nodes.literal_block(self.block_text, self.block_text),
 260                       line=self.lineno)
 261                 raise SystemMessagePropagation(error)
 262             source_dir = os.path.dirname(
 263                 os.path.abspath(self.state.document.current_source))
 264             source = os.path.normpath(os.path.join(source_dir,
 265                                                    self.options['file']))
 266             source = utils.relative_path(None, source)
 267             try:
 268                 self.state.document.settings.record_dependencies.add(source)
 269                 csv_file = io.FileInput(
 270                     source_path=source, encoding=encoding,
 271                     error_handler=(self.state.document.settings.\
 272                                    input_encoding_error_handler),
 273                     handle_io_errors=None)
 274                 csv_data = csv_file.read().splitlines()
 275             except IOError, error:
 276                 severe = self.state_machine.reporter.severe(
 277                     'Problems with "%s" directive path:\n%s.'
 278                     % (self.name, error), nodes.literal_block(
 279                     self.block_text, self.block_text), line=self.lineno)
 280                 raise SystemMessagePropagation(severe)
 281         elif 'url' in self.options:
 282             # CSV data is from a URL.
 283             # Do not import urllib2 at the top of the module because
 284             # it may fail due to broken SSL dependencies, and it takes
 285             # about 0.15 seconds to load.
 286             import urllib2
 287             source = self.options['url']
 288             try:
 289                 csv_text = urllib2.urlopen(source).read()
 290             except (urllib2.URLError, IOError, OSError, ValueError), error:
 291                 severe = self.state_machine.reporter.severe(
 292                       'Problems with "%s" directive URL "%s":\n%s.'
 293                       % (self.name, self.options['url'], error),
 294                       nodes.literal_block(self.block_text, self.block_text),
 295                       line=self.lineno)
 296                 raise SystemMessagePropagation(severe)
 297             csv_file = io.StringInput(
 298                 source=csv_text, source_path=source, encoding=encoding,
 299                 error_handler=(self.state.document.settings.\
 300                                input_encoding_error_handler))
 301             csv_data = csv_file.read().splitlines()
 302         else:
 303             error = self.state_machine.reporter.warning(
 304                 'The "%s" directive requires content; none supplied.'
 305                 % self.name, nodes.literal_block(
 306                 self.block_text, self.block_text), line=self.lineno)
 307             raise SystemMessagePropagation(error)
 308         return csv_data, source
 309
 310     if sys.version_info < (3,):
 311         # 2.x csv module doesn't do Unicode
 312         def decode_from_csv(s):
 313             return s.decode('utf-8')
 314         def encode_for_csv(s):
 315             return s.encode('utf-8')
 316     else:
 317         def decode_from_csv(s):
 318             return s
 319         def encode_for_csv(s):
 320             return s
 321     decode_from_csv = staticmethod(decode_from_csv)
 322     encode_for_csv = staticmethod(encode_for_csv)
 323
 324     def parse_csv_data_into_rows(self, csv_data, dialect, source):
 325         # csv.py doesn't do Unicode; encode temporarily as UTF-8
 326         csv_reader = csv.reader([self.encode_for_csv(line + '\n')
 327                                  for line in csv_data],
 328                                 dialect=dialect)
 329         rows = []
 330         max_cols = 0
 331         for row in csv_reader:
 332             row_data = []
 333             for cell in row:
 334                 # decode UTF-8 back to Unicode
 335                 cell_text = self.decode_from_csv(cell)
 336                 cell_data = (0, 0, 0, statemachine.StringList(
 337                     cell_text.splitlines(), source=source))
 338                 row_data.append(cell_data)
 339             rows.append(row_data)
 340             max_cols = max(max_cols, len(row))
 341         return rows, max_cols
 342
 343
 344 class ListTable(Table):
 345
 346     """
 347     Implement tables whose data is encoded as a uniform two-level bullet list.
 348     For further ideas, see
 349     http://docutils.sf.net/docs/dev/rst/alternatives.html#list-driven-tables
 350     """
 351
 352     option_spec = {'header-rows': directives.nonnegative_int,
 353                    'stub-columns': directives.nonnegative_int,
 354                    'widths': directives.positive_int_list,
 355                    'class': directives.class_option}
 356
 357     def run(self):
 358         if not self.content:
 359             error = self.state_machine.reporter.error(
 360                 'The "%s" directive is empty; content required.' % self.name,
 361                 nodes.literal_block(self.block_text, self.block_text),
 362                 line=self.lineno)
 363             return [error]
 364         title, messages = self.make_title()
 365         node = nodes.Element()          # anonymous container for parsing
 366         self.state.nested_parse(self.content, self.content_offset, node)
 367         try:
 368             num_cols, col_widths = self.check_list_content(node)
 369             table_data = [[item.children for item in row_list[0]]
 370                           for row_list in node[0]]
 371             header_rows = self.options.get('header-rows', 0)
 372             stub_columns = self.options.get('stub-columns', 0)
 373             self.check_table_dimensions(table_data, header_rows, stub_columns)
 374         except SystemMessagePropagation, detail:
 375             return [detail.args[0]]
 376         table_node = self.build_table_from_list(table_data, col_widths,
 377                                                 header_rows, stub_columns)
 378         table_node['classes'] += self.options.get('class', [])
 379         if title:
 380             table_node.insert(0, title)
 381         return [table_node] + messages
 382
 383     def check_list_content(self, node):
 384         if len(node) != 1 or not isinstance(node[0], nodes.bullet_list):
 385             error = self.state_machine.reporter.error(
 386                 'Error parsing content block for the "%s" directive: '
 387                 'exactly one bullet list expected.' % self.name,
 388                 nodes.literal_block(self.block_text, self.block_text),
 389                 line=self.lineno)
 390             raise SystemMessagePropagation(error)
 391         list_node = node[0]
 392         # Check for a uniform two-level bullet list:
 393         for item_index in range(len(list_node)):
 394             item = list_node[item_index]
 395             if len(item) != 1 or not isinstance(item[0], nodes.bullet_list):
 396                 error = self.state_machine.reporter.error(
 397                     'Error parsing content block for the "%s" directive: '
 398                     'two-level bullet list expected, but row %s does not '
 399                     'contain a second-level bullet list.'
 400                     % (self.name, item_index + 1), nodes.literal_block(
 401                     self.block_text, self.block_text), line=self.lineno)
 402                 raise SystemMessagePropagation(error)
 403             elif item_index:
 404                 # ATTN pychecker users: num_cols is guaranteed to be set in the
 405                 # "else" clause below for item_index==0, before this branch is
 406                 # triggered.
 407                 if len(item[0]) != num_cols:
 408                     error = self.state_machine.reporter.error(
 409                         'Error parsing content block for the "%s" directive: '
 410                         'uniform two-level bullet list expected, but row %s '
 411                         'does not contain the same number of items as row 1 '
 412                         '(%s vs %s).'
 413                         % (self.name, item_index + 1, len(item[0]), num_cols),
 414                         nodes.literal_block(self.block_text, self.block_text),
 415                         line=self.lineno)
 416                     raise SystemMessagePropagation(error)
 417             else:
 418                 num_cols = len(item[0])
 419         col_widths = self.get_column_widths(num_cols)
 420         return num_cols, col_widths
 421
 422     def build_table_from_list(self, table_data, col_widths, header_rows, stub_columns):
 423         table = nodes.table()
 424         tgroup = nodes.tgroup(cols=len(col_widths))
 425         table += tgroup
 426         for col_width in col_widths:
 427             colspec = nodes.colspec(colwidth=col_width)
 428             if stub_columns:
 429                 colspec.attributes['stub'] = 1
 430                 stub_columns -= 1
 431             tgroup += colspec
 432         rows = []
 433         for row in table_data:
 434             row_node = nodes.row()
 435             for cell in row:
 436                 entry = nodes.entry()
 437                 entry += cell
 438                 row_node += entry
 439             rows.append(row_node)
 440         if header_rows:
 441             thead = nodes.thead()
 442             thead.extend(rows[:header_rows])
 443             tgroup += thead
 444         tbody = nodes.tbody()
 445         tbody.extend(rows[header_rows:])
 446         tgroup += tbody
 447         return table