Updated rst.el with rst-mode.el 0.2.9
[docutils.git] / docutils / io.py
blob5a6672e151bc3c3aedd452b1a6f602f6c3adaecc
1 # Author: David Goodger
2 # Contact: goodger@users.sourceforge.net
3 # Revision: $Revision$
4 # Date: $Date$
5 # Copyright: This module has been placed in the public domain.
7 """
8 I/O classes provide a uniform API for low-level input and output. Subclasses
9 will exist for a variety of input/output mechanisms.
10 """
12 __docformat__ = 'reStructuredText'
14 import sys
15 try:
16 import locale
17 except:
18 pass
19 from types import UnicodeType
20 from docutils import TransformSpec
23 class Input(TransformSpec):
25 """
26 Abstract base class for input wrappers.
27 """
29 component_type = 'input'
31 default_source_path = None
33 def __init__(self, source=None, source_path=None, encoding=None,
34 error_handler='strict'):
35 self.encoding = encoding
36 """Text encoding for the input source."""
38 self.error_handler = error_handler
39 """Text decoding error handler."""
41 self.source = source
42 """The source of input data."""
44 self.source_path = source_path
45 """A text reference to the source."""
47 if not source_path:
48 self.source_path = self.default_source_path
50 self.successful_encoding = None
51 """The encoding that successfully decoded the source data."""
53 def __repr__(self):
54 return '%s: source=%r, source_path=%r' % (self.__class__, self.source,
55 self.source_path)
57 def read(self):
58 raise NotImplementedError
60 def decode(self, data):
61 """
62 Decode a string, `data`, heuristically.
63 Raise UnicodeError if unsuccessful.
65 The client application should call ``locale.setlocale`` at the
66 beginning of processing::
68 locale.setlocale(locale.LC_ALL, '')
69 """
70 if self.encoding and self.encoding.lower() == 'unicode':
71 assert isinstance(data, UnicodeType), (
72 'input encoding is "unicode" '
73 'but input is not a unicode object')
74 if isinstance(data, UnicodeType):
75 # Accept unicode even if self.encoding != 'unicode'.
76 return data
77 encodings = [self.encoding]
78 if not self.encoding:
79 # Apply heuristics only if no encoding is explicitly given.
80 encodings.append('utf-8')
81 try:
82 encodings.append(locale.nl_langinfo(locale.CODESET))
83 except:
84 pass
85 try:
86 encodings.append(locale.getlocale()[1])
87 except:
88 pass
89 try:
90 encodings.append(locale.getdefaultlocale()[1])
91 except:
92 pass
93 encodings.append('latin-1')
94 error = None
95 error_details = ''
96 for enc in encodings:
97 if not enc:
98 continue
99 try:
100 decoded = unicode(data, enc, self.error_handler)
101 self.successful_encoding = enc
102 # Return decoded, removing BOMs.
103 return decoded.replace(u'\ufeff', u'')
104 except (UnicodeError, LookupError), error:
105 pass
106 if error is not None:
107 error_details = '\n(%s: %s)' % (error.__class__.__name__, error)
108 raise UnicodeError(
109 'Unable to decode input data. Tried the following encodings: '
110 '%s.%s'
111 % (', '.join([repr(enc) for enc in encodings if enc]),
112 error_details))
115 class Output(TransformSpec):
118 Abstract base class for output wrappers.
121 component_type = 'output'
123 default_destination_path = None
125 def __init__(self, destination=None, destination_path=None,
126 encoding=None, error_handler='strict'):
127 self.encoding = encoding
128 """Text encoding for the output destination."""
130 self.error_handler = error_handler or 'strict'
131 """Text encoding error handler."""
133 self.destination = destination
134 """The destination for output data."""
136 self.destination_path = destination_path
137 """A text reference to the destination."""
139 if not destination_path:
140 self.destination_path = self.default_destination_path
142 def __repr__(self):
143 return ('%s: destination=%r, destination_path=%r'
144 % (self.__class__, self.destination, self.destination_path))
146 def write(self, data):
147 """`data` is a Unicode string, to be encoded by `self.encode`."""
148 raise NotImplementedError
150 def encode(self, data):
151 if self.encoding and self.encoding.lower() == 'unicode':
152 assert isinstance(data, UnicodeType), (
153 'the encoding given is "unicode" but the output is not '
154 'a Unicode string')
155 return data
156 if not isinstance(data, UnicodeType):
157 # Non-unicode (e.g. binary) output.
158 return data
159 else:
160 try:
161 return data.encode(self.encoding, self.error_handler)
162 except ValueError:
163 # ValueError is raised if there are unencodable chars
164 # in data and the error_handler isn't found.
165 if self.error_handler == 'xmlcharrefreplace':
166 # We are using xmlcharrefreplace with a Python
167 # version that doesn't support it (2.1 or 2.2), so
168 # we emulate its behavior.
169 return ''.join([self.xmlcharref_encode(char)
170 for char in data])
171 else:
172 raise
174 def xmlcharref_encode(self, char):
175 """Emulate Python 2.3's 'xmlcharrefreplace' encoding error handler."""
176 try:
177 return char.encode(self.encoding, 'strict')
178 except UnicodeError:
179 return '&#%i;' % ord(char)
182 class FileInput(Input):
185 Input for single, simple file-like objects.
188 def __init__(self, source=None, source_path=None,
189 encoding=None, error_handler='strict',
190 autoclose=1, handle_io_errors=1):
192 :Parameters:
193 - `source`: either a file-like object (which is read directly), or
194 `None` (which implies `sys.stdin` if no `source_path` given).
195 - `source_path`: a path to a file, which is opened and then read.
196 - `encoding`: the expected text encoding of the input file.
197 - `error_handler`: the encoding error handler to use.
198 - `autoclose`: close automatically after read (boolean); always
199 false if `sys.stdin` is the source.
200 - `handle_io_errors`: summarize I/O errors here, and exit?
202 Input.__init__(self, source, source_path, encoding, error_handler)
203 self.autoclose = autoclose
204 self.handle_io_errors = handle_io_errors
205 if source is None:
206 if source_path:
207 try:
208 self.source = open(source_path)
209 except IOError, error:
210 if not handle_io_errors:
211 raise
212 print >>sys.stderr, '%s: %s' % (error.__class__.__name__,
213 error)
214 print >>sys.stderr, (
215 'Unable to open source file for reading (%r). Exiting.'
216 % source_path)
217 sys.exit(1)
218 else:
219 self.source = sys.stdin
220 self.autoclose = None
221 if not source_path:
222 try:
223 self.source_path = self.source.name
224 except AttributeError:
225 pass
227 def read(self):
229 Read and decode a single file and return the data (Unicode string).
231 try:
232 data = self.source.read()
233 finally:
234 if self.autoclose:
235 self.close()
236 return self.decode(data)
238 def close(self):
239 self.source.close()
242 class FileOutput(Output):
245 Output for single, simple file-like objects.
248 def __init__(self, destination=None, destination_path=None,
249 encoding=None, error_handler='strict', autoclose=1,
250 handle_io_errors=1):
252 :Parameters:
253 - `destination`: either a file-like object (which is written
254 directly) or `None` (which implies `sys.stdout` if no
255 `destination_path` given).
256 - `destination_path`: a path to a file, which is opened and then
257 written.
258 - `autoclose`: close automatically after write (boolean); always
259 false if `sys.stdout` is the destination.
261 Output.__init__(self, destination, destination_path,
262 encoding, error_handler)
263 self.opened = 1
264 self.autoclose = autoclose
265 self.handle_io_errors = handle_io_errors
266 if destination is None:
267 if destination_path:
268 self.opened = None
269 else:
270 self.destination = sys.stdout
271 self.autoclose = None
272 if not destination_path:
273 try:
274 self.destination_path = self.destination.name
275 except AttributeError:
276 pass
278 def open(self):
279 try:
280 self.destination = open(self.destination_path, 'w')
281 except IOError, error:
282 if not self.handle_io_errors:
283 raise
284 print >>sys.stderr, '%s: %s' % (error.__class__.__name__,
285 error)
286 print >>sys.stderr, ('Unable to open destination file for writing '
287 '(%r). Exiting.' % self.destination_path)
288 sys.exit(1)
289 self.opened = 1
291 def write(self, data):
292 """Encode `data`, write it to a single file, and return it."""
293 output = self.encode(data)
294 if not self.opened:
295 self.open()
296 try:
297 self.destination.write(output)
298 finally:
299 if self.autoclose:
300 self.close()
301 return output
303 def close(self):
304 self.destination.close()
305 self.opened = None
308 class StringInput(Input):
311 Direct string input.
314 default_source_path = '<string>'
316 def read(self):
317 """Decode and return the source string."""
318 return self.decode(self.source)
321 class StringOutput(Output):
324 Direct string output.
327 default_destination_path = '<string>'
329 def write(self, data):
330 """Encode `data`, store it in `self.destination`, and return it."""
331 self.destination = self.encode(data)
332 return self.destination
335 class NullInput(Input):
338 Degenerate input: read nothing.
341 default_source_path = 'null input'
343 def read(self):
344 """Return a null string."""
345 return u''
348 class NullOutput(Output):
351 Degenerate output: write nothing.
354 default_destination_path = 'null output'
356 def write(self, data):
357 """Do nothing ([don't even] send data to the bit bucket)."""
358 pass
361 class DocTreeInput(Input):
364 Adapter for document tree input.
366 The document tree must be passed in the ``source`` parameter.
369 default_source_path = 'doctree input'
371 def read(self):
372 """Return the document tree."""
373 return self.source