1 import os
, re
, sys
, codecs
3 import xml
.sax
.saxutils
6 # turn on this line if you want to disablenamespaces
7 ##from xml.sax.handler import feature_namespaces
9 ##import paul.restructure_tools.handle_inline_text
10 import docutils_nest
.inline_to_xml
11 # # import txt_to_xml.brackets_to_xml
18 :author: Paul Tremblay
26 :revremark: When I first started writing this document.
32 :revremark: Turned off namespaces. Tried "to get rid of using import ..
33 from" construction, since I don't understand it.
39 :revremark: Documented the module. Escaped brackets such as \[. Escaped any
40 bracket in an attribute.
46 :revremark: The user can now choose his or her own way to define groups.
52 This module gets a file and changes brackets to inline tags:
56 <paragraph>Text [:word1 word2 word3: Text [:word1 word2: Text2 [regular bracket]]Text3]
64 <paragraph>Text <arg1="word1" arg2="word2" arg3="word3"> Text <inline arg1="word1" arg2= "word2"> Text2 [regular bracket]</inline>Text3</inline>
74 class InlineHandler(xml
.sax
.saxutils
.DefaultHandler
):
77 Class for handling the XML file. SAX uses the methods in this class to
90 warning
= 'problematic'
99 write_obj -- a write object
108 Set the necessary parameters:
110 self.__write_obj --the object for outputing text
112 self.__name --name of current element
114 self.__character --string containg characters in current
117 self.__in_block --whether the text is in a block element.
119 self.__block_tags -- a list of tags that include blocks of
120 text. In other words, all tags except those tags that surround
123 self.__block_tags -- the string of all text and elements in
126 self.__handl_br_text_obj -- an object to handle all block text
127 that contains an open bracket.
130 self
.__write
_obj
= write_obj
132 self
.__character
= ''
134 self
.__block
_tags
= ['paragraph', 'author', 'date', 'revision',
136 self
.__block
_tag
_string
= ''
137 # self.__handle_br_text_obj = \
138 # rst_bracket_inline.handle_bracket_string.Inline()
139 self
.__handle
_br
_text
_obj
= \
140 docutils_nest
.inline_to_xml
.InlineToXML(
141 start_role
= start_role
,
143 start_group
= start_group
,
144 end_group
= end_group
,
150 def startElement(self
, name
, attrs
):
155 The SAX driver uses this function when if finds a beginning tag.
157 Escape all opening and closing brackets for the values of
158 attributes, so they won't be processed.
160 Make a string from the opening tag. If you are in a block element,
161 add this string to the block element string. Otherwise, write the
168 if name
in self
.__block
_tags
:
170 open_tag
= '<%s' % name
175 # kind of a kludge. Only escape text that is going to be unescaped
178 value
= value
.replace('[', '\000')
179 value
= value
.replace(']', '\001')
180 open_tag
+= ' %s="%s"' % (att
, value
)
183 self
.__block
_tag
_string
+= open_tag
185 self
.__write
_obj
.write(open_tag
)
188 def characters(self
, character
):
193 The SAX driver uses this function when it finds text.
195 If the text is between two literal tags, then I want to escape all
196 opening and closing brackts to that they are not processed.
198 If the text is not between two literal tags, I want to escape a
199 backlash followed by a bracket.
201 (Note: The only way that a file processed by docutils-xml will
202 pass on a backslash followed by a bracket is if the original file
203 has *2* backslashes followed by a bracket.)
205 Add the changed string to the block test string.
207 If the text is not in a block (which I don't think should happen),
208 it simply outputs it to the file.
212 character
= character
.replace('&', '&')
213 character
= character
.replace('<', '<')
214 character
= character
.replace('>', '>')
216 if self
.__name
== 'literal':
217 character
= character
.replace('[', '\000')
218 character
= character
.replace(']', '\001')
219 # replace escaped backslashes not in literal
221 character
= character
.replace('\\]', '\001')
222 self
.__block
_tag
_string
+= character
224 self
.__write
_obj
.write(character
)
227 def endElement(self
, name
):
232 The SAX driver uses the function when it finds an end tag. It
233 pases to this function the name of the end element.
235 If the name is a block element, the function checks if it has any
236 starting brackets. If it does, the string should be processed with
237 the handle bracket module.
239 The text that has been escaped is not unescaped, and the string is
240 written to the output file.
242 If the tag does not indicate the end of a block, but you are in a
243 block tag, add the text to the block string.
245 If the text has nothing to do with a block, simly output it to the
252 if name
in self
.__block
_tags
:
253 self
.__block
_tag
_string
+= '</%s>' % name
254 # handle all the text
255 if '[' in self
.__block
_tag
_string
:
257 self
.__handle
_br
_text
_obj
.make_tags(self
.__block
_tag
_string
)
259 tagged_text
= self
.__block
_tag
_string
260 tagged_text
= tagged_text
.replace('\000', '[')
261 tagged_text
= tagged_text
.replace('\001', ']')
262 self
.__write
_obj
.write(tagged_text
)
264 self
.__block
_tag
_string
= ''
265 elif self
.__in
_block
:
266 self
.__block
_tag
_string
+= '</%s>' % name
268 self
.__write
_obj
.write('</%s>' % name
)
273 class InlineBrackets
:
284 warning
= 'problematic'
294 file --file to be read
296 output --file to output to
301 Nothing. Outputs a file
305 Set up a write object.
307 Create an instance of the InlineHandler for sax to use.
309 Pass this instance to the SAX driver.
311 Use the SAX driver to handle the file.
315 self
.__output
= output
317 self
.__start
_role
= start_role
318 self
.__end
_role
= end_role
319 self
.__start
_group
= start_group
320 self
.__end
_group
= end_group
322 self
.__tag
_name
= tag_name
323 self
.__warning
= warning
326 (utf8_encode
, utf8_decode
, utf8_reader
, utf8_writer
) = codecs
.lookup("utf-8")
327 write_obj
= utf8_writer(open(self
.__output
, 'w'))
328 parser
= xml
.sax
.make_parser()
329 # turn on this line if you want to disable namespaces
330 ##parser.setFeature(feature_namespaces, 0)
331 inline_handler
= InlineHandler( write_obj
= write_obj
,
332 start_role
= self
.__start
_role
,
333 end_role
= self
.__end
_role
,
334 start_group
= self
.__start
_group
,
335 end_group
= self
.__end
_group
,
336 place
= self
.__place
,
337 tag_name
= self
.__tag
_name
,
338 warning
= self
.__warning
340 parser
.setContentHandler(inline_handler
)
341 parser
.parse(self
.__file
)
346 if __name__
== '__main__':
347 file = '/home/paul/lib/python/paul/restructure_tools/test_inline.xml'
348 output
= '/home/paul/paultemp/brackets_to_tags.temp.xml'
349 obj
= InlineBrackets(file, output
= output
, warning
= '5fuck:? yo&<>u')
351 command
= 'xmlvalid -c -v %s' % output