removed obsolete issues (many of them fixed with AE)
[docutils.git] / sandbox / paultremblay / docutils_nest / docutils_nest / nested_inline.py
blob7937f070e5a29caa7a7ece9436c2854ac3df8941
1 import os, re, sys, codecs
3 import xml.sax.saxutils
4 import xml.sax
6 # turn on this line if you want to disablenamespaces
7 ##from xml.sax.handler import feature_namespaces
9 ##import paul.restructure_tools.handle_inline_text
10 import docutils_nest.inline_to_xml
11 # # import txt_to_xml.brackets_to_xml
13 """
15 Inline
16 ^^^^^^
18 :author: Paul Tremblay
20 :revision:
22 :revnumber: .2
24 :date: 2003-05-03
26 :revremark: When I first started writing this document.
28 :revnumber: .21
30 :date: 2003-05-06
32 :revremark: Turned off namespaces. Tried "to get rid of using import ..
33 from" construction, since I don't understand it.
35 :revnumber: .22
37 :date: 2003-05-09
39 :revremark: Documented the module. Escaped brackets such as \[. Escaped any
40 bracket in an attribute.
42 :revnumber: .23
44 :date: 2003-05-31
46 :revremark: The user can now choose his or her own way to define groups.
48 ========
49 Overview
50 ========
52 This module gets a file and changes brackets to inline tags:
54 <document>
56 <paragraph>Text [:word1 word2 word3: Text [:word1 word2: Text2 [regular bracket]]Text3]
57 </paragraph>
59 </document>
62 <document>
64 <paragraph>Text <arg1="word1" arg2="word2" arg3="word3"> Text <inline arg1="word1" arg2= "word2"> Text2 [regular bracket]</inline>Text3</inline>
65 </paragraph>
67 </document>
69 """
74 class InlineHandler(xml.sax.saxutils.DefaultHandler):
75 """
77 Class for handling the XML file. SAX uses the methods in this class to
78 handle the data.
81 """
82 def __init__( self,
83 write_obj,
84 start_role = ':',
85 end_role = ':',
86 start_group = '[',
87 end_group = ']',
88 place = 'inside',
89 tag_name = 'inline',
90 warning = 'problematic'
94 """
97 Requires:
99 write_obj -- a write object
101 Returns:
103 nothing
106 Logic:
108 Set the necessary parameters:
110 self.__write_obj --the object for outputing text
112 self.__name --name of current element
114 self.__character --string containg characters in current
115 element.
117 self.__in_block --whether the text is in a block element.
119 self.__block_tags -- a list of tags that include blocks of
120 text. In other words, all tags except those tags that surround
121 inline items.
123 self.__block_tags -- the string of all text and elements in
124 the current block.
126 self.__handl_br_text_obj -- an object to handle all block text
127 that contains an open bracket.
130 self.__write_obj = write_obj
131 self.__name = ''
132 self.__character = ''
133 self.__in_block = 0
134 self.__block_tags = ['paragraph', 'author', 'date', 'revision',
135 'version']
136 self.__block_tag_string = ''
137 # self.__handle_br_text_obj = \
138 # rst_bracket_inline.handle_bracket_string.Inline()
139 self.__handle_br_text_obj = \
140 docutils_nest.inline_to_xml.InlineToXML(
141 start_role = start_role,
142 end_role = end_role,
143 start_group = start_group,
144 end_group = end_group,
145 place = place,
146 tag_name = tag_name,
147 warning = warning
150 def startElement(self, name, attrs):
153 Logic:
155 The SAX driver uses this function when if finds a beginning tag.
157 Escape all opening and closing brackets for the values of
158 attributes, so they won't be processed.
160 Make a string from the opening tag. If you are in a block element,
161 add this string to the block element string. Otherwise, write the
162 string to the file.
167 self.__name = name
168 if name in self.__block_tags:
169 self.__in_block = 1
170 open_tag = '<%s' % name
171 keys = attrs.keys()
172 for key in keys:
173 att = key
174 value = attrs[key]
175 # kind of a kludge. Only escape text that is going to be unescaped
176 # later on
177 if self.__in_block:
178 value = value.replace('[', '\000')
179 value = value.replace(']', '\001')
180 open_tag += ' %s="%s"' % (att, value)
181 open_tag += '>'
182 if self.__in_block:
183 self.__block_tag_string += open_tag
184 else:
185 self.__write_obj.write(open_tag)
188 def characters(self, character):
191 Logic:
193 The SAX driver uses this function when it finds text.
195 If the text is between two literal tags, then I want to escape all
196 opening and closing brackts to that they are not processed.
198 If the text is not between two literal tags, I want to escape a
199 backlash followed by a bracket.
201 (Note: The only way that a file processed by docutils-xml will
202 pass on a backslash followed by a bracket is if the original file
203 has *2* backslashes followed by a bracket.)
205 Add the changed string to the block test string.
207 If the text is not in a block (which I don't think should happen),
208 it simply outputs it to the file.
212 character = character.replace('&', '&amp')
213 character = character.replace('<', '&lt;')
214 character = character.replace('>', '&gt;')
215 if self.__in_block:
216 if self.__name == 'literal':
217 character = character.replace('[', '\000')
218 character = character.replace(']', '\001')
219 # replace escaped backslashes not in literal
220 else:
221 character = character.replace('\\]', '\001')
222 self.__block_tag_string += character
223 else:
224 self.__write_obj.write(character)
227 def endElement(self, name):
230 Logic:
232 The SAX driver uses the function when it finds an end tag. It
233 pases to this function the name of the end element.
235 If the name is a block element, the function checks if it has any
236 starting brackets. If it does, the string should be processed with
237 the handle bracket module.
239 The text that has been escaped is not unescaped, and the string is
240 written to the output file.
242 If the tag does not indicate the end of a block, but you are in a
243 block tag, add the text to the block string.
245 If the text has nothing to do with a block, simly output it to the
246 file.
252 if name in self.__block_tags:
253 self.__block_tag_string += '</%s>' % name
254 # handle all the text
255 if '[' in self.__block_tag_string:
256 tagged_text = \
257 self.__handle_br_text_obj.make_tags(self.__block_tag_string)
258 else:
259 tagged_text = self.__block_tag_string
260 tagged_text = tagged_text.replace('\000', '[')
261 tagged_text = tagged_text.replace('\001', ']')
262 self.__write_obj.write(tagged_text)
263 self.__in_block = 0
264 self.__block_tag_string = ''
265 elif self.__in_block:
266 self.__block_tag_string += '</%s>' % name
267 else:
268 self.__write_obj.write('</%s>' % name)
273 class InlineBrackets:
275 def __init__( self,
276 file,
277 output,
278 start_role = ':',
279 end_role = ':',
280 start_group = '[',
281 end_group = ']',
282 place = 'inside',
283 tag_name = 'inline',
284 warning = 'problematic'
292 Requires:
294 file --file to be read
296 output --file to output to
299 Returns:
301 Nothing. Outputs a file
303 Logic:
305 Set up a write object.
307 Create an instance of the InlineHandler for sax to use.
309 Pass this instance to the SAX driver.
311 Use the SAX driver to handle the file.
315 self.__output = output
316 self.__file = file
317 self.__start_role = start_role
318 self.__end_role = end_role
319 self.__start_group = start_group
320 self.__end_group = end_group
321 self.__place = place
322 self.__tag_name = tag_name
323 self.__warning = warning
325 def make_tags(self):
326 (utf8_encode, utf8_decode, utf8_reader, utf8_writer) = codecs.lookup("utf-8")
327 write_obj = utf8_writer(open(self.__output, 'w'))
328 parser = xml.sax.make_parser()
329 # turn on this line if you want to disable namespaces
330 ##parser.setFeature(feature_namespaces, 0)
331 inline_handler = InlineHandler( write_obj = write_obj,
332 start_role = self.__start_role,
333 end_role = self.__end_role,
334 start_group = self.__start_group,
335 end_group = self.__end_group,
336 place = self.__place,
337 tag_name = self.__tag_name,
338 warning = self.__warning
340 parser.setContentHandler(inline_handler)
341 parser.parse(self.__file)
342 write_obj.close()
346 if __name__ == '__main__':
347 file = '/home/paul/lib/python/paul/restructure_tools/test_inline.xml'
348 output = '/home/paul/paultemp/brackets_to_tags.temp.xml'
349 obj = InlineBrackets(file, output = output, warning = '5fuck:? yo&<>u')
350 obj.make_tags()
351 command = 'xmlvalid -c -v %s' % output
352 os.system(command)