Spelling fixes
[docutils.git] / docutils / writers / docutils_xml.py
blobe4a44fb5ce5f153ec4742e16ed18236ee7905c25
1 # $Id$
2 # Author: David Goodger, Paul Tremblay, Guenter Milde
3 # Maintainer: docutils-develop@lists.sourceforge.net
4 # Copyright: This module has been placed in the public domain.
6 """
7 Simple document tree Writer, writes Docutils XML according to
8 http://docutils.sourceforge.net/docs/ref/docutils.dtd.
9 """
11 __docformat__ = 'reStructuredText'
13 import sys
15 # Work around broken PyXML and obsolete python stdlib behaviour. (The stdlib
16 # replaces its own xml module with PyXML if the latter is installed. However,
17 # PyXML is no longer maintained and partially incompatible/buggy.) Reverse
18 # the order in which xml module and submodules are searched to import stdlib
19 # modules if they exist and PyXML modules if they do not exist in the stdlib.
21 # See http://sourceforge.net/tracker/index.php?func=detail&aid=3552403&group_id=38414&atid=422030
22 # and http://lists.fedoraproject.org/pipermail/python-devel/2012-July/000406.html
23 import xml
24 if "_xmlplus" in xml.__path__[0]: # PyXML sub-module
25 xml.__path__.reverse() # If both are available, prefer stdlib over PyXML
27 import xml.sax.saxutils
28 from StringIO import StringIO
30 import docutils
31 from docutils import frontend, writers, nodes
34 class RawXmlError(docutils.ApplicationError): pass
37 class Writer(writers.Writer):
39 supported = ('xml',)
40 """Formats this writer supports."""
42 settings_spec = (
43 '"Docutils XML" Writer Options',
44 None,
45 (('Generate XML with newlines before and after tags.',
46 ['--newlines'],
47 {'action': 'store_true', 'validator': frontend.validate_boolean}),
48 ('Generate XML with indents and newlines.',
49 ['--indents'],
50 {'action': 'store_true', 'validator': frontend.validate_boolean}),
51 ('Omit the XML declaration. Use with caution.',
52 ['--no-xml-declaration'],
53 {'dest': 'xml_declaration', 'default': 1, 'action': 'store_false',
54 'validator': frontend.validate_boolean}),
55 ('Omit the DOCTYPE declaration.',
56 ['--no-doctype'],
57 {'dest': 'doctype_declaration', 'default': 1,
58 'action': 'store_false', 'validator': frontend.validate_boolean}),))
60 settings_defaults = {'output_encoding_error_handler': 'xmlcharrefreplace'}
62 config_section = 'docutils_xml writer'
63 config_section_dependencies = ('writers',)
65 output = None
66 """Final translated form of `document`."""
68 def __init__(self):
69 writers.Writer.__init__(self)
70 self.translator_class = XMLTranslator
72 def translate(self):
73 self.visitor = visitor = self.translator_class(self.document)
74 self.document.walkabout(visitor)
75 self.output = ''.join(visitor.output)
78 class XMLTranslator(nodes.GenericNodeVisitor):
80 xml_declaration = '<?xml version="1.0" encoding="%s"?>\n'
81 # TODO: add stylesheet options similar to HTML and LaTeX writers?
82 #xml_stylesheet = '<?xml-stylesheet type="text/xsl" href="%s"?>\n'
83 doctype = (
84 '<!DOCTYPE document PUBLIC'
85 ' "+//IDN docutils.sourceforge.net//DTD Docutils Generic//EN//XML"'
86 ' "http://docutils.sourceforge.net/docs/ref/docutils.dtd">\n')
87 generator = '<!-- Generated by Docutils %s -->\n'
89 xmlparser = xml.sax.make_parser()
90 """SAX parser instance to check/exctract raw XML."""
91 xmlparser.setFeature(
92 "http://xml.org/sax/features/external-general-entities", True)
94 def __init__(self, document):
95 nodes.NodeVisitor.__init__(self, document)
97 # Reporter
98 self.warn = self.document.reporter.warning
99 self.error = self.document.reporter.error
101 # Settings
102 self.settings = settings = document.settings
103 self.indent = self.newline = ''
104 if settings.newlines:
105 self.newline = '\n'
106 if settings.indents:
107 self.newline = '\n'
108 self.indent = ' '
109 self.level = 0 # indentation level
110 self.in_simple = 0 # level of nesting inside mixed-content elements
112 # Output
113 self.output = []
114 if settings.xml_declaration:
115 self.output.append(
116 self.xml_declaration % settings.output_encoding)
117 if settings.doctype_declaration:
118 self.output.append(self.doctype)
119 self.output.append(self.generator % docutils.__version__)
121 # initialize XML parser
122 self.the_handle=TestXml()
123 self.xmlparser.setContentHandler(self.the_handle)
125 # generic visit and depart methods
126 # --------------------------------
128 def default_visit(self, node):
129 """Default node visit method."""
130 if not self.in_simple:
131 self.output.append(self.indent*self.level)
132 self.output.append(node.starttag(xml.sax.saxutils.quoteattr))
133 self.level += 1
134 if isinstance(node, nodes.TextElement):
135 self.in_simple += 1
136 if not self.in_simple:
137 self.output.append(self.newline)
139 def default_departure(self, node):
140 """Default node depart method."""
141 self.level -= 1
142 if not self.in_simple:
143 self.output.append(self.indent*self.level)
144 self.output.append(node.endtag())
145 if isinstance(node, nodes.TextElement):
146 self.in_simple -= 1
147 if not self.in_simple:
148 self.output.append(self.newline)
151 # specific visit and depart methods
152 # ---------------------------------
154 def visit_Text(self, node):
155 text = xml.sax.saxutils.escape(node.astext())
156 self.output.append(text)
158 def depart_Text(self, node):
159 pass
161 def visit_raw(self, node):
162 if 'xml' not in node.get('format', '').split():
163 # skip other raw content?
164 # raise nodes.SkipNode
165 self.default_visit(node)
166 return
167 # wrap in <raw> element
168 self.default_visit(node) # or not?
169 xml_string = node.astext()
170 self.output.append(xml_string)
171 self.default_departure(node) # or not?
172 # Check validity of raw XML:
173 if isinstance(xml_string, unicode) and sys.version_info < (3,):
174 xml_string = xml_string.encode('utf8')
175 try:
176 self.xmlparser.parse(StringIO(xml_string))
177 except xml.sax._exceptions.SAXParseException, error:
178 col_num = self.the_handle.locator.getColumnNumber()
179 line_num = self.the_handle.locator.getLineNumber()
180 srcline = node.line
181 if not isinstance(node.parent, nodes.TextElement):
182 srcline += 2 # directive content start line
183 msg = 'Invalid raw XML in column %d, line offset %d:\n%s' % (
184 col_num, line_num, node.astext())
185 self.warn(msg, source=node.source, line=srcline+line_num-1)
186 raise nodes.SkipNode # content already processed
189 class TestXml(xml.sax.ContentHandler):
191 def setDocumentLocator(self, locator):
192 self.locator = locator