1 # Author: David Goodger
2 # Contact: goodger@users.sourceforge.net
5 # Copyright: This module has been placed in the public domain.
8 I/O classes provide a uniform API for low-level input and output. Subclasses
9 will exist for a variety of input/output mechanisms.
12 __docformat__
= 'reStructuredText'
19 from types
import UnicodeType
20 from docutils
import TransformSpec
23 class Input(TransformSpec
):
26 Abstract base class for input wrappers.
29 component_type
= 'input'
31 default_source_path
= None
33 def __init__(self
, source
=None, source_path
=None, encoding
=None,
34 error_handler
='strict'):
35 self
.encoding
= encoding
36 """Text encoding for the input source."""
38 self
.error_handler
= error_handler
39 """Text decoding error handler."""
42 """The source of input data."""
44 self
.source_path
= source_path
45 """A text reference to the source."""
48 self
.source_path
= self
.default_source_path
50 self
.successful_encoding
= None
51 """The encoding that successfully decoded the source data."""
54 return '%s: source=%r, source_path=%r' % (self
.__class
__, self
.source
,
58 raise NotImplementedError
60 def decode(self
, data
):
62 Decode a string, `data`, heuristically.
63 Raise UnicodeError if unsuccessful.
65 The client application should call ``locale.setlocale`` at the
66 beginning of processing::
68 locale.setlocale(locale.LC_ALL, '')
70 if self
.encoding
and self
.encoding
.lower() == 'unicode':
71 assert isinstance(data
, UnicodeType
), (
72 'input encoding is "unicode" '
73 'but input is not a unicode object')
74 if isinstance(data
, UnicodeType
):
75 # Accept unicode even if self.encoding != 'unicode'.
77 encodings
= [self
.encoding
]
79 # Apply heuristics only if no encoding is explicitly given.
80 encodings
.append('utf-8')
82 encodings
.append(locale
.nl_langinfo(locale
.CODESET
))
86 encodings
.append(locale
.getlocale()[1])
90 encodings
.append(locale
.getdefaultlocale()[1])
93 encodings
.append('latin-1')
100 decoded
= unicode(data
, enc
, self
.error_handler
)
101 self
.successful_encoding
= enc
102 # Return decoded, removing BOMs.
103 return decoded
.replace(u
'\ufeff', u
'')
104 except (UnicodeError, LookupError), error
:
106 if error
is not None:
107 error_details
= '\n(%s: %s)' % (error
.__class
__.__name
__, error
)
109 'Unable to decode input data. Tried the following encodings: '
111 % (', '.join([repr(enc
) for enc
in encodings
if enc
]),
115 class Output(TransformSpec
):
118 Abstract base class for output wrappers.
121 component_type
= 'output'
123 default_destination_path
= None
125 def __init__(self
, destination
=None, destination_path
=None,
126 encoding
=None, error_handler
='strict'):
127 self
.encoding
= encoding
128 """Text encoding for the output destination."""
130 self
.error_handler
= error_handler
or 'strict'
131 """Text encoding error handler."""
133 self
.destination
= destination
134 """The destination for output data."""
136 self
.destination_path
= destination_path
137 """A text reference to the destination."""
139 if not destination_path
:
140 self
.destination_path
= self
.default_destination_path
143 return ('%s: destination=%r, destination_path=%r'
144 % (self
.__class
__, self
.destination
, self
.destination_path
))
146 def write(self
, data
):
147 """`data` is a Unicode string, to be encoded by `self.encode`."""
148 raise NotImplementedError
150 def encode(self
, data
):
151 if self
.encoding
and self
.encoding
.lower() == 'unicode':
152 assert isinstance(data
, UnicodeType
), (
153 'the encoding given is "unicode" but the output is not '
156 if not isinstance(data
, UnicodeType
):
157 # Non-unicode (e.g. binary) output.
161 return data
.encode(self
.encoding
, self
.error_handler
)
163 # ValueError is raised if there are unencodable chars
164 # in data and the error_handler isn't found.
165 if self
.error_handler
== 'xmlcharrefreplace':
166 # We are using xmlcharrefreplace with a Python
167 # version that doesn't support it (2.1 or 2.2), so
168 # we emulate its behavior.
169 return ''.join([self
.xmlcharref_encode(char
)
174 def xmlcharref_encode(self
, char
):
175 """Emulate Python 2.3's 'xmlcharrefreplace' encoding error handler."""
177 return char
.encode(self
.encoding
, 'strict')
179 return '&#%i;' % ord(char
)
182 class FileInput(Input
):
185 Input for single, simple file-like objects.
188 def __init__(self
, source
=None, source_path
=None,
189 encoding
=None, error_handler
='strict',
190 autoclose
=1, handle_io_errors
=1):
193 - `source`: either a file-like object (which is read directly), or
194 `None` (which implies `sys.stdin` if no `source_path` given).
195 - `source_path`: a path to a file, which is opened and then read.
196 - `encoding`: the expected text encoding of the input file.
197 - `error_handler`: the encoding error handler to use.
198 - `autoclose`: close automatically after read (boolean); always
199 false if `sys.stdin` is the source.
200 - `handle_io_errors`: summarize I/O errors here, and exit?
202 Input
.__init
__(self
, source
, source_path
, encoding
, error_handler
)
203 self
.autoclose
= autoclose
204 self
.handle_io_errors
= handle_io_errors
208 self
.source
= open(source_path
)
209 except IOError, error
:
210 if not handle_io_errors
:
212 print >>sys
.stderr
, '%s: %s' % (error
.__class
__.__name
__,
214 print >>sys
.stderr
, (
215 'Unable to open source file for reading (%r). Exiting.'
219 self
.source
= sys
.stdin
220 self
.autoclose
= None
223 self
.source_path
= self
.source
.name
224 except AttributeError:
229 Read and decode a single file and return the data (Unicode string).
232 data
= self
.source
.read()
236 return self
.decode(data
)
242 class FileOutput(Output
):
245 Output for single, simple file-like objects.
248 def __init__(self
, destination
=None, destination_path
=None,
249 encoding
=None, error_handler
='strict', autoclose
=1,
253 - `destination`: either a file-like object (which is written
254 directly) or `None` (which implies `sys.stdout` if no
255 `destination_path` given).
256 - `destination_path`: a path to a file, which is opened and then
258 - `autoclose`: close automatically after write (boolean); always
259 false if `sys.stdout` is the destination.
261 Output
.__init
__(self
, destination
, destination_path
,
262 encoding
, error_handler
)
264 self
.autoclose
= autoclose
265 self
.handle_io_errors
= handle_io_errors
266 if destination
is None:
270 self
.destination
= sys
.stdout
271 self
.autoclose
= None
272 if not destination_path
:
274 self
.destination_path
= self
.destination
.name
275 except AttributeError:
280 self
.destination
= open(self
.destination_path
, 'w')
281 except IOError, error
:
282 if not self
.handle_io_errors
:
284 print >>sys
.stderr
, '%s: %s' % (error
.__class
__.__name
__,
286 print >>sys
.stderr
, ('Unable to open destination file for writing '
287 '(%r). Exiting.' % self
.destination_path
)
291 def write(self
, data
):
292 """Encode `data`, write it to a single file, and return it."""
293 output
= self
.encode(data
)
297 self
.destination
.write(output
)
304 self
.destination
.close()
308 class StringInput(Input
):
314 default_source_path
= '<string>'
317 """Decode and return the source string."""
318 return self
.decode(self
.source
)
321 class StringOutput(Output
):
324 Direct string output.
327 default_destination_path
= '<string>'
329 def write(self
, data
):
330 """Encode `data`, store it in `self.destination`, and return it."""
331 self
.destination
= self
.encode(data
)
332 return self
.destination
335 class NullInput(Input
):
338 Degenerate input: read nothing.
341 default_source_path
= 'null input'
344 """Return a null string."""
348 class NullOutput(Output
):
351 Degenerate output: write nothing.
354 default_destination_path
= 'null output'
356 def write(self
, data
):
357 """Do nothing ([don't even] send data to the bit bucket)."""
361 class DocTreeInput(Input
):
364 Adapter for document tree input.
366 The document tree must be passed in the ``source`` parameter.
369 default_source_path
= 'doctree input'
372 """Return the document tree."""