2 # Author: David Goodger, Paul Tremblay, Guenter Milde
3 # Maintainer: docutils-develop@lists.sourceforge.net
4 # Copyright: This module has been placed in the public domain.
7 Simple document tree Writer, writes Docutils XML according to
8 http://docutils.sourceforge.net/docs/ref/docutils.dtd.
11 __docformat__
= 'reStructuredText'
15 # Work around broken PyXML and obsolete python stdlib behaviour. (The stdlib
16 # replaces its own xml module with PyXML if the latter is installed. However,
17 # PyXML is no longer maintained and partially incompatible/buggy.) Reverse
18 # the order in which xml module and submodules are searched to import stdlib
19 # modules if they exist and PyXML modules if they do not exist in the stdlib.
21 # See http://sourceforge.net/tracker/index.php?func=detail&aid=3552403&group_id=38414&atid=422030
22 # and http://lists.fedoraproject.org/pipermail/python-devel/2012-July/000406.html
24 if "_xmlplus" in xml
.__path
__[0]: # PyXML sub-module
25 xml
.__path
__.reverse() # If both are available, prefer stdlib over PyXML
27 import xml
.sax
.saxutils
28 from StringIO
import StringIO
31 from docutils
import frontend
, writers
, nodes
34 class RawXmlError(docutils
.ApplicationError
): pass
37 class Writer(writers
.Writer
):
40 """Formats this writer supports."""
43 '"Docutils XML" Writer Options',
45 (('Generate XML with newlines before and after tags.',
47 {'action': 'store_true', 'validator': frontend
.validate_boolean
}),
48 ('Generate XML with indents and newlines.',
50 {'action': 'store_true', 'validator': frontend
.validate_boolean
}),
51 ('Omit the XML declaration. Use with caution.',
52 ['--no-xml-declaration'],
53 {'dest': 'xml_declaration', 'default': 1, 'action': 'store_false',
54 'validator': frontend
.validate_boolean
}),
55 ('Omit the DOCTYPE declaration.',
57 {'dest': 'doctype_declaration', 'default': 1,
58 'action': 'store_false', 'validator': frontend
.validate_boolean
}),))
60 settings_defaults
= {'output_encoding_error_handler': 'xmlcharrefreplace'}
62 config_section
= 'docutils_xml writer'
63 config_section_dependencies
= ('writers',)
66 """Final translated form of `document`."""
69 writers
.Writer
.__init
__(self
)
70 self
.translator_class
= XMLTranslator
73 self
.visitor
= visitor
= self
.translator_class(self
.document
)
74 self
.document
.walkabout(visitor
)
75 self
.output
= ''.join(visitor
.output
)
78 class XMLTranslator(nodes
.GenericNodeVisitor
):
80 xml_declaration
= '<?xml version="1.0" encoding="%s"?>\n'
81 # TODO: add stylesheet options similar to HTML and LaTeX writers?
82 #xml_stylesheet = '<?xml-stylesheet type="text/xsl" href="%s"?>\n'
84 '<!DOCTYPE document PUBLIC'
85 ' "+//IDN docutils.sourceforge.net//DTD Docutils Generic//EN//XML"'
86 ' "http://docutils.sourceforge.net/docs/ref/docutils.dtd">\n')
87 generator
= '<!-- Generated by Docutils %s -->\n'
89 xmlparser
= xml
.sax
.make_parser()
90 """SAX parser instance to check/exctract raw XML."""
92 "http://xml.org/sax/features/external-general-entities", True)
94 def __init__(self
, document
):
95 nodes
.NodeVisitor
.__init
__(self
, document
)
98 self
.warn
= self
.document
.reporter
.warning
99 self
.error
= self
.document
.reporter
.error
102 self
.settings
= settings
= document
.settings
103 self
.indent
= self
.newline
= ''
104 if settings
.newlines
:
109 self
.level
= 0 # indentation level
110 self
.in_simple
= 0 # level of nesting inside mixed-content elements
114 if settings
.xml_declaration
:
116 self
.xml_declaration
% settings
.output_encoding
)
117 if settings
.doctype_declaration
:
118 self
.output
.append(self
.doctype
)
119 self
.output
.append(self
.generator
% docutils
.__version
__)
121 # initialize XML parser
122 self
.the_handle
=TestXml()
123 self
.xmlparser
.setContentHandler(self
.the_handle
)
125 # generic visit and depart methods
126 # --------------------------------
128 def default_visit(self
, node
):
129 """Default node visit method."""
130 if not self
.in_simple
:
131 self
.output
.append(self
.indent
*self
.level
)
132 self
.output
.append(node
.starttag(xml
.sax
.saxutils
.quoteattr
))
134 if isinstance(node
, nodes
.TextElement
):
136 if not self
.in_simple
:
137 self
.output
.append(self
.newline
)
139 def default_departure(self
, node
):
140 """Default node depart method."""
142 if not self
.in_simple
:
143 self
.output
.append(self
.indent
*self
.level
)
144 self
.output
.append(node
.endtag())
145 if isinstance(node
, nodes
.TextElement
):
147 if not self
.in_simple
:
148 self
.output
.append(self
.newline
)
151 # specific visit and depart methods
152 # ---------------------------------
154 def visit_Text(self
, node
):
155 text
= xml
.sax
.saxutils
.escape(node
.astext())
156 self
.output
.append(text
)
158 def depart_Text(self
, node
):
161 def visit_raw(self
, node
):
162 if 'xml' not in node
.get('format', '').split():
163 # skip other raw content?
164 # raise nodes.SkipNode
165 self
.default_visit(node
)
167 # wrap in <raw> element
168 self
.default_visit(node
) # or not?
169 xml_string
= node
.astext()
170 self
.output
.append(xml_string
)
171 self
.default_departure(node
) # or not?
172 # Check validity of raw XML:
173 if isinstance(xml_string
, unicode) and sys
.version_info
< (3,):
174 xml_string
= xml_string
.encode('utf8')
176 self
.xmlparser
.parse(StringIO(xml_string
))
177 except xml
.sax
._exceptions
.SAXParseException
, error
:
178 col_num
= self
.the_handle
.locator
.getColumnNumber()
179 line_num
= self
.the_handle
.locator
.getLineNumber()
181 if not isinstance(node
.parent
, nodes
.TextElement
):
182 srcline
+= 2 # directive content start line
183 msg
= 'Invalid raw XML in column %d, line offset %d:\n%s' % (
184 col_num
, line_num
, node
.astext())
185 self
.warn(msg
, source
=node
.source
, line
=srcline
+line_num
-1)
186 raise nodes
.SkipNode
# content already processed
189 class TestXml(xml
.sax
.ContentHandler
):
191 def setDocumentLocator(self
, locator
):
192 self
.locator
= locator