Fix #338: re.sub() flag argument at wrong position.
[docutils.git] / sandbox / paultremblay / other / asciitomathml.py
blob00ac849eb0241e2c950299f7a0101045cfe192a4
1 # -*- coding: UTF-8 -*-
2 import re, sys, copy
3 from copy import deepcopy
5 from xml.etree.ElementTree import Element, tostring
6 import xml.etree.ElementTree as etree
8 class InvalidAsciiMath(Exception):
9 """
10 handle invalid Ascii Math
12 """
13 pass
15 class AsciiMathML:
17 greek_dict = {
18 'alpha': u'\u03B1',
19 'beta': u'\u03B2',
20 'chi' : u'\u03C7',
21 'delta': u'\u03B4',
22 'epsi': u'\u03B5',
23 'varepsilon': u'\u025B',
24 'eta': u'\u03B7',
25 'gamma': u'\u03B3',
26 'iota': u'\u03B9',
27 'kappa': u'\u03BA',
28 'lambda': u'\u03BB',
29 'mu': u'\u03BC',
30 'nu': u'\u03BD',
31 'omega': u'\u03C9',
32 'phi': u'\u03C6',
33 'varphi': u'\u03D5',
34 'pi' : u'\u03C0',
35 'psi': u'\u03C8',
36 'Psi': u'\u03A8',
37 'rho': u'\u03C1',
38 'sigma': u'\u03C3',
39 'tau': u'\u03C4',
40 'theta': u'\u03B8',
41 'vartheta': u'\u03D1',
42 'Theta': u'\u0398',
43 'upsilon': u'\u03C5',
44 'xi': u'\u03BE',
45 'zeta': u'\u03B6'
48 symbol_dict = {
50 text_dict = {
51 'and':'and', # space = true
52 'or' : 'or', # space = true
53 'if' :'if', # space = true
56 symbol_dict.update(greek_dict)
57 operator_dict = {
58 'min': 'min',
59 'max': 'max',
60 'lim': 'lim',
61 'Lim': 'Lim',
62 'sin': 'sin',
63 'cos': 'cos',
64 'tan': 'tan',
65 'sinh': 'sinh',
66 'cosh': 'cosh',
67 'tanh': 'tanh',
68 'cot': 'cot',
69 'sec': 'sec',
70 'csc': 'csc',
71 'log': 'log',
72 'ln': 'ln',
73 'det': 'det',
74 'gcd': 'gcd',
75 'lcm': 'lcm',
76 'Delta': u'\u0394',
77 'Gamma': u'\u0393',
78 'Lambda':u'\u039B',
79 'Omega': u'\u03A9',
80 'Phi' : u'\u03A6',
81 'Pi' : u'\u03A0',
82 'Sigma': u'\u2211',
83 'sum': u'\u2211',
84 'Xi': u'\u039E',
85 'prod': u'\u220f',
86 '^^^': u'\u22c0',
87 'vvv': u'\u22c1',
88 'nnn': u'\u22c2',
89 'uuu': u'\u22c3',
90 "*" : u"\u22C5",
91 "**": u"\u22C6",
92 "//": u"/",
93 "\\\\": u"\\",
94 "setminus": u"\\",
95 "xx": u"\u00D7",
96 "-:": u"\u00F7",
97 "@": u"\u2218",
98 "o+": u"\u2295",
99 "ox": u"\u2297",
100 "o.": u"\u2299",
101 "^^": u"\u2227",
102 "vv": u"\u2228",
103 "nn": u"\u2229",
104 "uu": u"\u222A",
105 "!=":u"\u2260",
106 ":=": u":=",
107 "lt": u"<",
108 "<=": u"\u2264",
109 "lt=": u"\u2264",
110 ">=": u"\u2265",
111 "geq": u"\u2265",
112 "ge": u"\u2265",
113 "-<": u"\u227A",
114 "-lt": u"\u227A",
115 ">-": u"\u227B",
116 "-<=": u"\u2AAF",
117 ">-=": u"\u2AB0",
118 "in": u"\u2208",
119 "!in":u"\u2209",
120 "sub": u"\u2282",
121 "sup":u"\u2283",
122 "sube": u"\u2286",
123 "supe": u"\u2287",
124 "-=": u"\u2261",
125 "~=": u"\u2245",
126 "~~": u"\u2248",
127 "prop": u"\u221D",
128 "not": u"\u00AC",
129 "=>": u"\u21D2",
130 "<=>": u"\u21D4",
131 "AA": u"\u2200",
132 "EE": u"\u2203",
133 "_|_": u"\u22A5",
134 "TT": u"\u22A4",
135 "|--": u"\u22A2",
136 "|==": u"\u22A8",
137 "int": u"\u222B",
138 "oint":u"\u222E",
139 "del": u"\u2202",
140 "grad":u"\u2207",
141 "+-":u"\u00B1",
142 "O/":u"\u2205",
143 "oo":u"\u221E",
144 'aleph': u"\u2135",
145 "...":u"...",
146 ":.":u"\u2234",
147 "/_": u"\u2220",
148 "\\ ":u"\u00A0",
149 "quad": u"\u00A0\u00A0",
150 "qquad": u"\u00A0\u00A0\u00A0\u00A0",
151 "cdots": u"\u22EF",
152 "vdots": u"\u22EE",
153 "ddots": u"\u22F1",
154 "diamond": u"\u22C4",
155 "square": u"\u25A1",
156 "|__":u"\u230A",
157 "__|": u"\u230B",
158 "|~":u"\u2308",
159 "~|": u"\u2309",
160 "CC": u"\u2102",
161 "NN":u"\u2115",
162 "QQ": u"\u211A",
163 "RR": u"\u211D",
164 "ZZ": u"\u2124",
165 "dim": u"dim",
166 "mod": u"mod",
167 "lub": u"lub",
168 "glb": u"glb",
170 # left off f and g because don't know what to do with them
172 special_dict = {
173 '(':{'type':'special'},
174 '{':{'type':'special'},
175 '}':{'type':'special'},
176 ')':{'type':'special'},
177 '[':{'type':'special'},
178 ']':{'type':'special'},
179 '/':{'type':'special'},
180 '^':{'type':'special'},
181 '_':{'type':'special'},
182 '|':{'type':'special'},
183 '||':{'type':'special'},
184 '(:':{'type':'special'},
185 ':)':{'type':'special'},
186 '<<':{'type':'special'},
187 '>>':{'type':'special'},
188 '{:':{'type':'special'},
189 ':}':{'type':'special'},
190 'hat':{'type':'special'},
191 'bar':{'type':'special'},
192 'vec':{'type':'special'},
193 'dot':{'type':'special'},
194 'ddot':{'type':'special'},
195 'ul':{'type':'special'},
196 'root':{'type':'special'},
197 'stackrel':{'type':'special'},
198 'frac':{'type':'special'},
199 'sqrt':{'type':'special'},
200 'text':{'type':'special'},
204 under_over_list = [u"\u2211", u"\u220f", u"\u22c0", u"\u22c1",u"\u22c2",u"\u22c3", "min", "max", "Lim", "lim"]
205 under_over_base_last = ['hat', 'bar', 'vec', 'dot', 'ddot', 'ul']
206 over_list = ['hat', 'bar', 'vec', 'dot', 'ddot']
207 under_list = ['ul']
208 fence_list = ['(', ')', '{', '}', '[', ']', u'\u2239', u'\u232a', '(:', ':)', '<<', '>>', '{:', ':}']
209 open_fence_list = ['(', '{', '[', u'\u2329', '<<', '{:']
210 close_fence_list = [')', '}', ']', u'\u232A', '>>', ':}']
211 function_list = ['root', 'stackrel', 'frac', 'sqrt']
212 group_func_list = ['min', 'max', 'sin', 'cos', 'tan', 'sinh', 'cosh', 'tanh', 'cot', 'sec', 'csc', 'log', 'ln', 'det', 'gcd', 'lcm']
213 fence_pair = {')':'(', '}':'{', ']':'[', u'\u232A':u'\u2329', ':}': '{:'} # last pair goes first in this dic
214 over_dict = {'hat':'^', 'bar':u"\u00AF", 'vec':u"\u2192", 'dot':u".", 'ddot':u".."}
215 under_dict = {'ul': u"\u0332"}
216 sym_list = symbol_dict.keys()
217 spec_name_list = special_dict.keys()
218 op_name_list = operator_dict.keys()
219 text_list = text_dict.keys()
220 names = sorted(sym_list + op_name_list + spec_name_list + text_list, key=lambda key_string: len(key_string), reverse=True)
222 def __init__(self, output_encoding = 'utf8'):
223 self._number_re = re.compile('-?(\d+\.(\d+)?|\.?\d+)')
224 self._tree = Element('math')
225 mstyle = etree.SubElement(self._tree, 'mstyle')
226 self._mathml_ns = 'http://www.w3.org/1998/Math/MathML'
227 self._append_el = mstyle
228 self._output_encoding = output_encoding
229 self._fenced_for_right = False # used fence for right fence with no match
230 self._fenced_for_left = False # used fence for left fence with no match
231 self._use_fence = True # use <mfence> for fences
233 def _add_namespace(self):
234 attributes = self._tree.attrib
235 value = attributes.get('xmlns')
236 if not value:
237 self._tree.set('xmlns', self._mathml_ns)
239 def to_xml_string(self, encoding=None):
240 if not encoding:
241 encoding = self._output_encoding
242 self._add_namespace()
243 xml_string = tostring(self._tree, encoding=encoding)
244 return xml_string
246 def get_tree(self):
247 self._add_namespace()
248 return self._tree
250 def _make_element(self, tag, text=None, *children, **attrib):
251 element = Element(tag, **attrib)
253 if not text is None:
254 if isinstance(text, basestring):
255 element.text = text
256 else:
257 children = (text, ) + children
259 for child in children:
260 element.append(child)
262 return element
264 def _change_el_name(self, element, new_name):
265 element.tag = new_name
268 def _get_previous_sibling(self, element , the_tree = 0):
270 either the previous sibling passed to the function, or if none is passed,
271 the previous sibling of the last element written
274 if the_tree == 0:
275 the_tree = self._tree
276 parent = self._get_parent(child = element, the_tree = the_tree)
277 if parent == None:
278 return
279 counter = -1
280 for child in parent:
281 counter += 1
282 if child == element:
283 if counter - 1 < 0:
284 return None
285 return parent[counter - 1]
287 def _get_last_element(self):
288 if len(self._append_el) > 0:
289 return self._append_el[-1]
290 return self._append_el
292 def _get_following_sibling(self, element, the_tree = 0):
293 if the_tree == 0:
294 the_tree = self._tree
295 parent = self._get_parent(the_tree = the_tree, child = element)
296 if parent == None:
297 return
298 counter = -1
299 for child in parent:
300 counter += 1
301 if child == element:
302 if len(parent) == counter + 1:
303 return None
304 return parent[counter + 1]
306 def _get_parent(self, child, the_tree = 0):
309 the_tree: an xml.etree of the whole tree
311 child: an xml.etree of the child element
313 There is no direct way to get the parent of an element in etree. This
314 method makes a child-parent dictionary of the whold tree, than accesses
315 the dictionary
318 if the_tree == 0:
319 the_tree = self._tree
320 child_parent_map = dict((c, p) for p in the_tree.getiterator() for c in p)
321 parent = child_parent_map.get(child)
322 return parent
324 def _get_grandparent(self, child, the_tree = 0):
325 parent = self._get_parent(child = child, the_tree = the_tree)
326 grandparent = self._get_parent(child = parent, the_tree = the_tree)
328 def _change_element(self, element, name, **attributes):
330 Changes just the top element to the name "element' with the **attributes passed to it.
333 element.tag = name
334 the_keys = element.attrib.keys()
335 for the_key in the_keys:
336 del(element.attrib[the_key])
337 for att in attributes:
338 element.set(att, attributes[att])
341 def _fix_open_fence(self, element):
343 changes <mfence open="(" close=""
345 </mfenced>
347 <mo>(</mo>
352 parent = self._get_parent(element)
353 the_open = element.get('open')
354 position = 0
355 found = False
356 for e in parent:
357 if e == element:
358 found = 1
359 break
360 position += 1
361 paren = self._make_element('mo', text = the_open)
362 parent.insert(position, paren)
363 c = 1 + position
364 for e in element:
365 parent.insert(c, e)
366 c += 1
367 parent.remove(element)
369 def _insert_mrow(self, element, class_name):
371 Inserts an mrow element around element
374 if len(element) == 1:
375 return
376 self._change_element(element, 'mrow', **{'class':class_name})
377 new_element = deepcopy(element)
378 self._append_el.remove(element)
379 parenthesis = self._make_element('mfenced', open='(', separators='', close=")")
380 parenthesis.append(new_element)
381 self._append_el.append(parenthesis)
383 def _count_commas(self, element):
385 counts commas for matrix
387 if element == None:
388 return 0
389 count = 0
390 for child in element:
391 if child.tag == 'mo' and child.text == ',':
392 count += 1
393 return count
395 def _is_matrix(self, element):
397 Tests if element is in fact a matrix
400 if len(element) < 3:
401 return
402 if not self._is_full_fenced(element):
403 return
404 counter = 0
405 row_len = None
406 for child in element:
407 if counter % 2 == 0: # even
408 if not self._is_full_fenced(child):
409 return
410 num_commas = self._count_commas(child)
411 if row_len == None:
412 row_len = num_commas
413 else:
414 if num_commas != row_len:
415 return
416 inner_counter = 0
417 else:
418 if child.tag != 'mo' or child.text != ',':
419 return
420 counter += 1
421 return True
424 def _is_full_fenced(self, element):
426 Returns True if element is a fence with matching open and close; or if open is {: or closse is :}
429 if element == None:
430 return
431 close_fence = element.get('close')
432 the_class = element.get('class')
433 open_fence = element.get('open')
434 if close_fence == u'\u232A': # don't remove these parenthesis
435 return
436 pair = self.fence_pair.get(close_fence)
437 if the_class == 'invisible':
438 pass
439 elif not pair:
440 return
441 if element.tag == 'mfenced':
442 return True
445 def _add_num_to_tree(self, token, the_type):
446 element = self._make_element('mn', text=token)
447 self._append_el.append(element)
450 def _add_text_to_tree(self, text):
451 present_text = self._append_el.text
452 if present_text == None:
453 present_text = ''
454 self._append_el.text = present_text + text
456 def _add_text_el_to_tree(self):
457 element = self._make_element('mtext')
458 self._append_el.append(element)
459 self._append_el = element
461 def _end_text_el_to_tree(self):
462 self._append_el.attrib.pop('open')
463 self._append_el = self._get_parent(self._append_el)
465 def _add_special_text_to_tree(self, text):
467 adds if , and , or
470 element = self._make_element('mspace', **{'width':'1ex'})
471 self._append_el.append(element)
472 element = self._make_element('mo', text=text)
473 self._append_el.append(element)
474 element = self._make_element('mspace', **{'width':'1ex'})
475 self._append_el.append(element)
477 def _add_neg_num_to_tree(self, token, the_type):
478 groups = ['msup', 'msub', 'munderover', 'munder', 'mover', 'mroot', 'msqrt', 'mfrac']
479 if self._append_el.tag in groups:
480 element = self._make_element('mrow', **{'class':'neg-num'})
481 self._append_el.append(element)
482 append_el = element
483 else:
484 append_el = self._append_el
485 num = token[1:]
486 element = self._make_element('mo', text='-')
487 append_el.append(element)
488 element = self._make_element('mn', text=num)
489 append_el.append(element)
491 def _add_alpha_to_tree(self, token, the_type):
492 element = self._make_element('mi', text=token)
493 self._append_el.append(element)
495 def _add_symbol_to_tree(self, token, token_dict):
496 token = token_dict['symbol']
497 element = self._make_element('mi', text=token)
498 self._append_el.append(element)
500 def _add_operator_to_tree(self, token, token_info):
501 if isinstance(token_info, dict):
502 text = token_info.get('symbol')
503 else:
504 text = token
505 element = self._make_element('mo', text=text)
506 self._append_el.append(element)
508 def _do_matrix(self):
509 last_element = self._get_last_element()
510 is_matrix = self._is_matrix(last_element)
511 if not is_matrix:
512 return
513 the_open = last_element.get('open')
514 close = last_element.get('close')
515 the_class = last_element.get('class')
516 the_dict = {'open': the_open, 'close': close, 'separators':''}
517 if the_class:
518 the_dict['class'] = the_class
519 fenced = self._make_element('mfenced', **the_dict)
520 table = self._make_element('mtable')
521 fenced.append(table)
522 for child in last_element:
523 if self._is_full_fenced(child):
524 row = self._make_element('mtr')
525 table.append(row)
526 cell = self._make_element('mtd')
527 row.append(cell)
528 for gc in child:
529 if gc.tag != 'mo' or gc.text != ',':
530 cell.append(gc)
531 else:
532 cell = self._make_element('mtd')
533 row.append(cell)
534 self._append_el.remove(last_element)
535 self._append_el.append(fenced)
538 def _handle_binary(self, token, info):
539 last_element = self._get_last_element()
540 if last_element == self._append_el: # no "previous sibling," and can't process
541 self._add_operator_to_tree(token, info)
542 return
543 if token == '/':
544 num_frac = 0
545 if last_element.tag == 'mfrac':
546 for child in last_element:
547 if child.tag == 'mfrac':
548 num_frac +=1
549 if num_frac % 2 != 0:
550 self._append_el = last_element
551 last_element = self._get_last_element()
552 if self._is_full_fenced(last_element):
553 self._change_element(last_element, 'mrow', **{'class':'nominator'})
554 nominator = deepcopy(last_element)
555 self._append_el.remove(last_element)
556 mfrac = self._make_element('mfrac', nominator)
557 self._append_el.append(mfrac)
558 self._append_el = mfrac
559 elif token == '^' or token == '_':
560 if last_element.tag == 'msub' or last_element.tag == 'munder':
561 if last_element.tag == 'msub':
562 new_element = self._make_element('msubsup')
563 else:
564 new_element = self._make_element('munderover')
565 for child in last_element: # should be just 2--check?
566 element = deepcopy(child)
567 new_element.append(element)
568 self._append_el.remove(last_element)
569 self._append_el.append(new_element)
570 self._append_el = new_element
572 else:
573 if last_element.text in self.under_over_list and token == '^':
574 el_name = 'mover'
575 elif last_element.text in self.under_over_list and token == '_':
576 el_name = 'munder'
577 elif token == '^':
578 el_name = 'msup'
579 elif token == '_':
580 el_name = 'msub'
581 base = deepcopy(last_element)
582 self._append_el.remove(last_element)
583 base = self._make_element(el_name, base)
584 self._append_el.append(base)
585 self._append_el = base
588 def _handle_open_fence(self, token):
589 if self._use_fence:
590 element = self._make_element('mfenced', open=token, separators='', close="")
591 else:
592 element = self._make_element('mo', text=token)
593 self._append_el.append(element)
594 self._append_el = element
596 def _handle_close_fence(self, token):
597 first_match = self.fence_pair.get(token)
598 element = self._append_el
599 match_found = False
600 while element != None:
601 if element.tag == 'mfenced' and element.get('open') == first_match :
602 element.set('close', token)
603 parent = self._get_parent(element)
604 self._append_el = parent
605 match_found = True
606 break
607 elif element.tag == 'mfenced' and ( element.get('open') == '{:' or token == ':}'):
608 element.set('class','invisible')
609 parent = self._get_parent(element)
610 self._append_el = parent
611 match_found = True
612 break
613 element = self._get_parent(element)
615 if match_found:
616 return
617 if self._fenced_for_right:
618 element = self._make_element('mfenced', open='', separators='', close=token)
619 self._append_el.append(element)
620 else:
621 element = self._make_element('mo', text=token)
622 self._append_el.append(element)
624 def _handle_close_fence_old(self, token):
625 the_open = self._append_el.get('open')
626 first_match = self.fence_pair.get(token)
627 if self._append_el.tag == 'mfenced' and the_open == first_match :
628 self._append_el.set('close', token)
629 parent = self._get_parent(self._append_el)
630 self._append_el = parent
631 elif self._append_el.tag == 'mfenced' and ( the_open == '{:' or token == ':}'):
632 self._append_el.set('class','invisible')
633 parent = self._get_parent(self._append_el)
634 self._append_el = parent
635 else:
636 self._find_match_fence(self._append_el, token)
637 if self._fenced_for_right:
638 element = self._make_element('mfenced', open='', separators='', close=token)
639 self._append_el.append(element)
640 else:
641 element = self._make_element('mo', text=token)
642 self._append_el.append(element)
644 def _find_match_fence(self, element, close):
645 first_match = self.fence_pair.get(close)
646 while element != None:
647 print(element.tag)
648 if element.tag == 'mfenced' and element.get('open') == first_match :
649 self._append_el = element
650 element.set('close', close)
651 parent = self._get_parent(element)
652 self._append_el = parent
653 break
654 element = self._get_parent(element)
656 def _handle_double_single_bar(self, token, the_type):
657 if token == '||':
658 the_chr = u'\u2016'
659 elif token == '|':
660 the_chr = '|'
662 if self._append_el.tag == 'mfenced' and self._append_el.get('open') == the_chr:
663 self._append_el.set('close', the_chr)
664 parent = self._get_parent(self._append_el)
665 self._append_el = parent
666 else:
667 element = self._make_element('mfenced', open=the_chr, separators='', close="")
668 self._append_el.append(element)
669 self._append_el = element
672 def _handle_over(self, token):
673 element = self._make_element('mover', **{'class':token} )
674 self._append_el.append(element)
675 self._append_el = element
677 def _handle_under(self, token):
678 element = self._make_element('munder', **{'class':token} )
679 self._append_el.append(element)
680 self._append_el = element
682 def _handle_function(self, token):
683 if token == 'root':
684 element = self._make_element('mroot')
685 self._append_el.append(element)
686 self._append_el = element
687 elif token == 'stackrel':
688 element = self._make_element('mover', **{'class':'stackrel'})
689 self._append_el.append(element)
690 self._append_el = element
691 elif token == 'frac':
692 element = self._make_element('mfrac')
693 self._append_el.append(element)
694 self._append_el = element
695 elif token == 'sqrt':
696 element = self._make_element('msqrt')
697 self._append_el.append(element)
698 self._append_el = element
700 def _add_special_to_tree(self, token, the_type):
701 if token in self.open_fence_list:
702 self._handle_open_fence(token)
703 elif token in self.close_fence_list:
704 self._handle_close_fence(token)
705 elif token == '/' or token == '^' or token == '_':
706 self._handle_binary(token, the_type)
707 elif token == '||' or token == '|':
708 self._handle_double_single_bar(token, the_type)
709 elif token == '|':
710 self._handle_single_bar(token, the_type)
711 elif token in self.over_list:
712 self._handle_over(token)
713 elif token in self.under_list:
714 self._handle_under(token)
715 elif token in self.function_list:
716 self._handle_function(token)
718 def _add_fence_to_tree(self, token, the_type):
719 if token == '(:' or token == '<<':
720 token = u"\u2329"
721 if token == ':)' or token == '>>':
722 token = u"\u232a"
723 if token in self.open_fence_list:
724 self._handle_open_fence(token)
725 elif token in self.close_fence_list:
726 self._handle_close_fence(token)
728 def _fix_tree(self):
729 for e in self._tree.getiterator('mfenced'):
730 if e.get('close') == '' and e.get('class') != 'invisible' and not self._fenced_for_left:
731 self._fix_open_fence(e)
732 for e in self._tree.getiterator():
733 if e.tag == 'mfrac' and len(e) != 2:
734 element = self._make_element('mo')
735 while len(e) != 2:
736 element = self._make_element('mo')
737 e.insert(len(e), element)
738 elif e.tag == 'mroot' and len(e) != 2:
739 if len(e) == 1:
740 element = self._make_element('mo')
741 e.insert(0,element)
742 else:
743 while len(e) != 2:
744 element = self._make_element('mo')
745 e.insert(len(e), element)
746 elif (e.tag == 'mover' or e.tag == 'munder') and len(e) != 2:
747 char = self.over_dict.get(e.get('class'))
748 if not char:
749 char = self.under_dict.get(e.get('class'))
750 if len(e) == 0:
751 element = self._make_element('mo')
752 e.insert(0, element)
753 element = self._make_element('mo', text=char)
754 e.insert(1, element)
755 elif len(e) == 1:
756 element = self._make_element('mo')
757 e.insert(1, element)
758 elif (e.tag == 'munderover' or e.tag == 'msubsup') and len(e) != 3:
759 element = self._make_element('mo')
760 while len(e) != 3:
761 element = self._make_element('mo')
762 e.insert(len(e), element)
763 elif (e.tag == 'msup' or e.tag == 'msub') and len(e) != 2:
764 element = self._make_element('mo')
765 while len(e) != 2:
766 element = self._make_element('mo')
767 e.insert(len(e), element)
770 def parse_string(self, the_string):
772 Need Documentation!
776 while the_string != '':
777 the_string, token, token_info = self._parse_tokens(the_string)
778 if isinstance(token_info, str):
779 the_type = token_info
780 else:
781 the_type = token_info.get('type')
782 if the_type == 'text':
783 text = token
784 self._add_text_to_tree(text)
785 elif the_type == 'start_text':
786 self._add_text_el_to_tree()
787 elif the_type == 'end_text':
788 self._end_text_el_to_tree()
789 elif the_type == 'special_text':
790 self._add_special_text_to_tree(token)
791 elif the_type == 'number':
792 self._add_num_to_tree(token, the_type)
793 elif the_type == 'neg_number':
794 self._add_neg_num_to_tree(token, the_type)
795 elif the_type == 'alpha':
796 self._add_alpha_to_tree(token, the_type)
797 elif the_type == 'symbol':
798 self._add_symbol_to_tree(token, token_info)
799 elif the_type == 'operator':
800 self._add_operator_to_tree(token, token_info)
801 elif token in self.fence_list:
802 self._add_fence_to_tree(token, the_type)
803 elif the_type == 'special':
804 self._add_special_to_tree(token, the_type)
806 if self._append_el.tag == 'mover' and self._append_el.get('class') == 'stackrel' and len(self._append_el) == 2:
807 if self._is_full_fenced(self._append_el[0]):
808 self._change_element(self._append_el[0], 'mrow', **{'class':'top'})
809 if self._is_full_fenced(self._append_el[1]):
810 self._change_element(self._append_el[1], 'mrow', **{'class':'bottom'})
811 top = deepcopy(self._append_el[0])
812 self._append_el[0] = self._append_el[1]
813 self._append_el[1] = top
814 self._append_el = self._get_parent(self._append_el)
815 elif (self._append_el.tag == 'mover' or self._append_el.tag == 'munder')\
816 and self._append_el.get('class') in self.under_over_base_last and len(self._append_el) > 0:
817 last_element = self._get_last_element()
818 if self._is_full_fenced(last_element): # remove parenthesis
819 if self._append_el.tag == 'mover':
820 the_dict = {'class':'mover'}
821 if self._append_el.tag == 'munder':
822 the_dict = {'class':'munder'}
823 self._change_element(last_element, 'mrow', **the_dict)
824 text = self._append_el.get('class') # add top
825 if self._append_el.tag == 'mover':
826 text = self.over_dict.get(text)
827 elif self._append_el.tag == 'munder':
828 text = self.under_dict.get(text)
829 element = self._make_element('mo', text=text)
830 self._append_el.append(element)
831 self._append_el = self._get_parent(self._append_el)
832 elif self._append_el.tag == 'msqrt' and len(self._append_el) == 1:
833 if self._is_full_fenced(self._append_el[0]):
834 self._change_element(self._append_el[0], 'mrow', **{'class':'radical'})
835 self._append_el = self._get_parent(self._append_el)
836 elif self._append_el.tag == 'mroot' and len(self._append_el) == 2:
837 if self._is_full_fenced(self._append_el[0]):
838 self._change_element(self._append_el[0], 'mrow', **{'class':'index'})
839 if self._is_full_fenced(self._append_el[1]):
840 self._change_element(self._append_el[1], 'mrow', **{'class':'base'})
841 the_index = deepcopy(self._append_el[0])
842 self._append_el[0] = self._append_el[1]
843 self._append_el[1] = the_index
844 self._append_el = self._get_parent(self._append_el)
846 elif (self._append_el.tag == 'mfrac' or self._append_el.tag == 'msup' or\
847 self._append_el.tag == 'msub' or self._append_el.tag == 'munder'\
848 or self._append_el.tag == 'mover') and len(self._append_el) == 2:
849 if self._is_full_fenced(self._append_el[1]):
850 if self._append_el.tag == 'mfrac':
851 the_dict = {'class':'denominator'}
852 elif self._append_el.tag == 'msup':
853 the_dict = {'class':'superscript'}
854 elif self._append_el.tag == 'msub':
855 the_dict = {'class':'subcript'}
856 elif self._append_el.tag == 'munder':
857 the_dict = {'class':'munder'}
858 elif self._append_el.tag == 'mover':
859 the_dict = {'class':'mover'}
860 self._change_element(self._append_el[1], 'mrow', **the_dict)
861 self._append_el = self._get_parent(self._append_el)
862 elif self._append_el.tag =='msubsup' or self._append_el.tag == 'munderover':
863 last_element = self._get_last_element()
864 prev_sib = self._get_previous_sibling(last_element)
865 prev_prev_sib =self._get_previous_sibling(prev_sib)
866 if prev_prev_sib != None:
867 if self._is_full_fenced(last_element):
868 if self._append_el.tag == 'msubsup':
869 the_dict = {'class':'subsuper'}
870 else:
871 the_dict = {'class':'munderover'}
872 self._change_element(last_element, 'mrow', **the_dict)
873 self._append_el = self._get_parent(self._append_el)
875 last_element = self._get_last_element()
876 if last_element.tag == 'mfenced' and last_element.get('close') == ':}':
877 self._change_element(last_element, 'mrow', **{'class':'invisible'})
879 if self._is_full_fenced(last_element) and len(self._append_el)> 1 :
880 prev_sib = self._get_previous_sibling(last_element)
881 is_function = False
882 if prev_sib.text in self.group_func_list:
883 is_function = True
884 if prev_sib.tag == 'munderover':
885 if prev_sib[0].tag == 'mo' and prev_sib[0].text in self.group_func_list:
886 is_function = True
887 if is_function:
888 self._insert_mrow(last_element, 'function')
890 self._do_matrix()
891 self._fix_tree()
894 def _look_at_next_token(self, the_string):
895 the_string, token, the_type = self._parse_tokens(the_string)
896 return the_string, token, the_type
899 def _parse_tokens(self, the_string):
903 processes the string one token at a time. If a number is found, process
904 and return the number with the rest of the stirng.
906 Else, see if the string starts with a special symbol, and process and
907 return that with the rest of the string.
909 Else, get the next character, and process that with the rest of the string.
912 if self._append_el.tag == 'mtext':
913 next_char = the_string[0]
914 if not self._append_el.get('open'):
915 if next_char == ' ':
916 return the_string[1:], ' ', {'type':'empty_text'}
917 elif next_char == '(' or next_char == '{' or next_char == '[':
918 self._append_el.set('open', next_char)
919 return the_string[1:], ' ', {'type': 'empty_text'}
920 else: # false text; continue
921 self._append_el = self._get_parent(self._append_el)
922 else:
923 first_match = self.fence_pair.get(next_char)
924 if first_match:
925 return the_string[1:], next_char, {'type':'end_text'}
926 else:
927 return the_string[1:], next_char, {'type':'text'}
928 the_string = the_string.strip()
930 if the_string == '':
931 return None, None, None
933 match = self._number_re.match(the_string)
935 if match: # found a number
936 number = match.group(0)
937 if number[0] == '-':
938 return the_string[match.end():], number, 'neg_number'
939 else:
940 return the_string[match.end():], number, 'number'
942 for name in self.names:
943 if the_string.startswith(name):
944 the_found = the_string[:len(name)]
945 symbol = self.symbol_dict.get(the_found)
946 operator = self.operator_dict.get(the_found)
947 special = self.special_dict.get(the_found)
948 text = self.text_dict.get(the_found)
949 if the_found == 'text':
950 return the_string[len(name):], name, {'type': 'start_text'}
951 elif symbol != None:
952 return the_string[len(name):], name, {'type': 'symbol', 'symbol': symbol}
953 elif special != None:
954 return the_string[len(name):], name, special
955 elif operator != None:
956 return the_string[len(name):], name, {'type': 'operator', 'symbol': operator}
957 elif text != None:
958 return the_string[len(name):], name, {'type': 'special_text'}
960 # found either an operator or a letter
962 if the_string[0].isalpha():
963 return the_string[1:], the_string[0], 'alpha'
964 else:
965 return the_string[1:], the_string[0], 'operator'
969 def ascii_to_xml_string(the_string):
970 if isinstance(the_string, str) and sys.version_info < (3,):
971 the_string = the_string.decode('utf8')
972 math_obj = AsciiMathML()
973 math_obj.parse_string(the_string)
974 xml_string = math_obj.to_xml_string()
975 return xml_string
977 def ascii_to_math_tree(the_string):
978 if isinstance(the_string, str) and sys.version_info < (3,):
979 the_string = the_string.decode('utf8')
980 math_obj = AsciiMathML()
981 math_obj.parse_string(the_string)
982 math_tree = math_obj.get_tree()
983 return math_tree