Put leavevmode before longtable to avoid having it moved before sub/pargraph.
[docutils.git] / docutils / io.py
blobdcce3532430f6b240e684bda6b19deb3540cea09
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 I/O classes provide a uniform API for low-level input and output. Subclasses
7 will exist for a variety of input/output mechanisms.
8 """
10 __docformat__ = 'reStructuredText'
12 import sys
13 try:
14 import locale
15 except:
16 pass
17 import re
18 from types import UnicodeType
19 from docutils import TransformSpec
22 class Input(TransformSpec):
24 """
25 Abstract base class for input wrappers.
26 """
28 component_type = 'input'
30 default_source_path = None
32 def __init__(self, source=None, source_path=None, encoding=None,
33 error_handler='strict'):
34 self.encoding = encoding
35 """Text encoding for the input source."""
37 self.error_handler = error_handler
38 """Text decoding error handler."""
40 self.source = source
41 """The source of input data."""
43 self.source_path = source_path
44 """A text reference to the source."""
46 if not source_path:
47 self.source_path = self.default_source_path
49 self.successful_encoding = None
50 """The encoding that successfully decoded the source data."""
52 def __repr__(self):
53 return '%s: source=%r, source_path=%r' % (self.__class__, self.source,
54 self.source_path)
56 def read(self):
57 raise NotImplementedError
59 def decode(self, data):
60 """
61 Decode a string, `data`, heuristically.
62 Raise UnicodeError if unsuccessful.
64 The client application should call ``locale.setlocale`` at the
65 beginning of processing::
67 locale.setlocale(locale.LC_ALL, '')
68 """
69 if self.encoding and self.encoding.lower() == 'unicode':
70 assert isinstance(data, UnicodeType), (
71 'input encoding is "unicode" '
72 'but input is not a unicode object')
73 if isinstance(data, UnicodeType):
74 # Accept unicode even if self.encoding != 'unicode'.
75 return data
76 if self.encoding:
77 # We believe the user/application when the encoding is
78 # explicitly given.
79 encodings = [self.encoding]
80 else:
81 data_encoding = self.determine_encoding_from_data(data)
82 if data_encoding:
83 # If the data declares its encoding (explicitly or via a BOM),
84 # we believe it.
85 encodings = [data_encoding]
86 else:
87 # Apply heuristics only if no encoding is explicitly given and
88 # no BOM found. Start with UTF-8, because that only matches
89 # data that *IS* UTF-8:
90 encodings = ['utf-8']
91 try:
92 # for Python 2.2 compatibility
93 encodings.append(locale.nl_langinfo(locale.CODESET))
94 except:
95 pass
96 try:
97 encodings.append(locale.getlocale()[1])
98 except:
99 pass
100 try:
101 encodings.append(locale.getdefaultlocale()[1])
102 except:
103 pass
104 # fallback encoding:
105 encodings.append('latin-1')
106 error = None
107 error_details = ''
108 for enc in encodings:
109 if not enc:
110 continue
111 try:
112 decoded = unicode(data, enc, self.error_handler)
113 self.successful_encoding = enc
114 # Return decoded, removing BOMs.
115 return decoded.replace(u'\ufeff', u'')
116 except (UnicodeError, LookupError), error:
117 pass
118 if error is not None:
119 error_details = '\n(%s: %s)' % (error.__class__.__name__, error)
120 raise UnicodeError(
121 'Unable to decode input data. Tried the following encodings: '
122 '%s.%s'
123 % (', '.join([repr(enc) for enc in encodings if enc]),
124 error_details))
126 coding_slug = re.compile("coding[:=]\s*([-\w.]+)")
127 """Encoding declaration pattern."""
129 byte_order_marks = (('\xef\xbb\xbf', 'utf-8'),
130 ('\xfe\xff', 'utf-16-be'),
131 ('\xff\xfe', 'utf-16-le'),)
132 """Sequence of (start_bytes, encoding) tuples to for encoding detection.
133 The first bytes of input data are checked against the start_bytes strings.
134 A match indicates the given encoding."""
136 def determine_encoding_from_data(self, data):
138 Try to determine the encoding of `data` by looking *in* `data`.
139 Check for a byte order mark (BOM) or an encoding declaration.
141 # check for a byte order mark:
142 for start_bytes, encoding in self.byte_order_marks:
143 if data.startswith(start_bytes):
144 return encoding
145 # check for an encoding declaration pattern in first 2 lines of file:
146 for line in data.splitlines()[:2]:
147 match = self.coding_slug.search(line)
148 if match:
149 return match.group(1)
150 return None
153 class Output(TransformSpec):
156 Abstract base class for output wrappers.
159 component_type = 'output'
161 default_destination_path = None
163 def __init__(self, destination=None, destination_path=None,
164 encoding=None, error_handler='strict'):
165 self.encoding = encoding
166 """Text encoding for the output destination."""
168 self.error_handler = error_handler or 'strict'
169 """Text encoding error handler."""
171 self.destination = destination
172 """The destination for output data."""
174 self.destination_path = destination_path
175 """A text reference to the destination."""
177 if not destination_path:
178 self.destination_path = self.default_destination_path
180 def __repr__(self):
181 return ('%s: destination=%r, destination_path=%r'
182 % (self.__class__, self.destination, self.destination_path))
184 def write(self, data):
185 """`data` is a Unicode string, to be encoded by `self.encode`."""
186 raise NotImplementedError
188 def encode(self, data):
189 if self.encoding and self.encoding.lower() == 'unicode':
190 assert isinstance(data, UnicodeType), (
191 'the encoding given is "unicode" but the output is not '
192 'a Unicode string')
193 return data
194 if not isinstance(data, UnicodeType):
195 # Non-unicode (e.g. binary) output.
196 return data
197 else:
198 try:
199 return data.encode(self.encoding, self.error_handler)
200 except (LookupError, ValueError):
201 # LookupError is raised if there are unencodable chars
202 # in data and the error_handler isn't found. In old
203 # Python versions, ValueError is raised.
204 if self.error_handler == 'xmlcharrefreplace':
205 # We are using xmlcharrefreplace with a Python
206 # version that doesn't support it (2.1, 2.2, or
207 # IronPython 1.0) so we emulate its behavior.
208 return ''.join([self.xmlcharref_encode(char)
209 for char in data])
210 else:
211 raise
213 def xmlcharref_encode(self, char):
214 """Emulate Python 2.3's 'xmlcharrefreplace' encoding error handler."""
215 try:
216 return char.encode(self.encoding, 'strict')
217 except UnicodeError:
218 return '&#%i;' % ord(char)
221 class FileInput(Input):
224 Input for single, simple file-like objects.
227 def __init__(self, source=None, source_path=None,
228 encoding=None, error_handler='strict',
229 autoclose=1, handle_io_errors=1):
231 :Parameters:
232 - `source`: either a file-like object (which is read directly), or
233 `None` (which implies `sys.stdin` if no `source_path` given).
234 - `source_path`: a path to a file, which is opened and then read.
235 - `encoding`: the expected text encoding of the input file.
236 - `error_handler`: the encoding error handler to use.
237 - `autoclose`: close automatically after read (boolean); always
238 false if `sys.stdin` is the source.
239 - `handle_io_errors`: summarize I/O errors here, and exit?
241 Input.__init__(self, source, source_path, encoding, error_handler)
242 self.autoclose = autoclose
243 self.handle_io_errors = handle_io_errors
244 if source is None:
245 if source_path:
246 try:
247 self.source = open(source_path)
248 except IOError, error:
249 if not handle_io_errors:
250 raise
251 print >>sys.stderr, '%s: %s' % (error.__class__.__name__,
252 error)
253 print >>sys.stderr, (
254 'Unable to open source file for reading (%r). Exiting.'
255 % source_path)
256 sys.exit(1)
257 else:
258 self.source = sys.stdin
259 self.autoclose = None
260 if not source_path:
261 try:
262 self.source_path = self.source.name
263 except AttributeError:
264 pass
266 def read(self):
268 Read and decode a single file and return the data (Unicode string).
270 try:
271 data = self.source.read()
272 finally:
273 if self.autoclose:
274 self.close()
275 return self.decode(data)
277 def close(self):
278 self.source.close()
281 class FileOutput(Output):
284 Output for single, simple file-like objects.
287 def __init__(self, destination=None, destination_path=None,
288 encoding=None, error_handler='strict', autoclose=1,
289 handle_io_errors=1):
291 :Parameters:
292 - `destination`: either a file-like object (which is written
293 directly) or `None` (which implies `sys.stdout` if no
294 `destination_path` given).
295 - `destination_path`: a path to a file, which is opened and then
296 written.
297 - `autoclose`: close automatically after write (boolean); always
298 false if `sys.stdout` is the destination.
300 Output.__init__(self, destination, destination_path,
301 encoding, error_handler)
302 self.opened = 1
303 self.autoclose = autoclose
304 self.handle_io_errors = handle_io_errors
305 if destination is None:
306 if destination_path:
307 self.opened = None
308 else:
309 self.destination = sys.stdout
310 self.autoclose = None
311 if not destination_path:
312 try:
313 self.destination_path = self.destination.name
314 except AttributeError:
315 pass
317 def open(self):
318 try:
319 self.destination = open(self.destination_path, 'w')
320 except IOError, error:
321 if not self.handle_io_errors:
322 raise
323 print >>sys.stderr, '%s: %s' % (error.__class__.__name__,
324 error)
325 print >>sys.stderr, ('Unable to open destination file for writing '
326 '(%r). Exiting.' % self.destination_path)
327 sys.exit(1)
328 self.opened = 1
330 def write(self, data):
331 """Encode `data`, write it to a single file, and return it."""
332 output = self.encode(data)
333 if not self.opened:
334 self.open()
335 try:
336 self.destination.write(output)
337 finally:
338 if self.autoclose:
339 self.close()
340 return output
342 def close(self):
343 self.destination.close()
344 self.opened = None
347 class StringInput(Input):
350 Direct string input.
353 default_source_path = '<string>'
355 def read(self):
356 """Decode and return the source string."""
357 return self.decode(self.source)
360 class StringOutput(Output):
363 Direct string output.
366 default_destination_path = '<string>'
368 def write(self, data):
369 """Encode `data`, store it in `self.destination`, and return it."""
370 self.destination = self.encode(data)
371 return self.destination
374 class NullInput(Input):
377 Degenerate input: read nothing.
380 default_source_path = 'null input'
382 def read(self):
383 """Return a null string."""
384 return u''
387 class NullOutput(Output):
390 Degenerate output: write nothing.
393 default_destination_path = 'null output'
395 def write(self, data):
396 """Do nothing ([don't even] send data to the bit bucket)."""
397 pass
400 class DocTreeInput(Input):
403 Adapter for document tree input.
405 The document tree must be passed in the ``source`` parameter.
408 default_source_path = 'doctree input'
410 def read(self):
411 """Return the document tree."""
412 return self.source