Update Swedish translation
[dasher.git] / Data / bpmf-trad-mand.py
blob06288a318e94499fc3ec2aeb75acfa2e394def1f
1 #!/usr/bin/python
3 import sys
4 import re
5 import string
7 def regor(xs):
8 return "["+("".join([("\\." if x is "." else x) for x in xs]))+"]"
10 #tree structure: top level chooses between initials (perhaps blank), next level groups, and so on
11 class Group:
12 def __init__(self):
13 self.syms = [];
14 self.children = {};
16 def getChild(self, ch):
17 if (ch not in self.children):
18 self.children[ch] = Group();
19 return self.children[ch];
21 #lists should contain (a list of ordered initials), then (a list of groups), then finals, then tones,
22 # but only starting with whichever will be required from this level of the tree down.
23 def write(self, indent, name, lists):
24 if len(lists)==0:
25 for sym in self.syms:
26 s='&#x'+hex(ord(sym))[2:]+';'
27 print indent + '<s d="'+s+'" t="'+s+'"/>';
28 else:
29 for ch in lists[0]:
30 if ch not in self.children:
31 continue;
32 if ch=="":
33 n=""
34 desc=indent + '<group label="" visible="no"'
35 else:
36 n='&#x'+string.upper(hex(ord(ch))[2:])+';' #hex() returns "0x..."
37 desc=indent + '<group label="'+n+'"'
38 if self.children[ch].syms:
39 desc+=' name="'+name+n+'"'
40 print desc+'>'
41 self.children[ch].write(indent+" ",name+n,lists[1:]);
42 del self.children[ch]
43 print indent + "</group>";
44 for ch in self.children:
45 print "WARNING Did not output",(["tone","final","group","initial"][len(lists)-1]),ch.encode("utf-8")
47 def addSym(self,sym):
48 self.syms+=sym
50 keypat = re.compile("([^ ]+) +([^ \r\n]+)",re.UNICODE);
51 keys = {}
53 with open(sys.argv[1]) as file:
54 for line in file:
55 if (re.match("%keyname",line) is not None):
56 break
57 #line should be the keyname begin.
58 for line in file:
59 if (re.match("%keyname",line) is not None): break
60 m = keypat.match(unicode(line,"utf-8"));
61 if (m is None):
62 die("Error, could not parse keyname"+line)
63 else:
64 keys[m.group(1)] = m.group(2);
65 #line should be the keyname end
67 groups = []
68 initials = []
69 finals = []
70 tones = []
72 for key,value in keys.items():
73 v=ord(value)
74 if v==0x3127 or v==0x3128 or v==0x3129:
75 groups+=key
76 elif v>=0x3105 and v<0x311A:
77 initials+=key
78 elif v>=0x311A and v<=0x3127:
79 finals+=key
80 elif v==0x02CA or v==0x02C7 or v==0x02CB or v==0x02D9: #0x30FB is a big version of 02d9
81 tones+=key
82 else:
83 die("**ERROR** unknown bopomofo "+key+" = "+value)
84 pat = "("+regor(initials)+"?)("+regor(groups)+"?)("+regor(finals)+"?)("+regor(tones)+"?)[\s\t]*([^\s\t]+)\n"
85 #print pat
86 pattern=re.compile(pat,re.UNICODE);
88 for line in file:
89 if (re.match("%chardef +begin",line) is not None):
90 break
91 die("Expected chardef begin, got "+line)
93 root = Group()
94 keys[""]=""
95 for line in file:
96 if (re.match("%chardef +end",line) is not None): break
97 m=pattern.match(unicode(line,"utf-8"))
98 if m is not None:
99 root.getChild(keys[m.group(1)]).getChild(keys[m.group(2)]).getChild(keys[m.group(3)]).getChild(keys[m.group(4)]).addSym(m.group(5))
100 else:
101 die("**ERROR** matching "+line)
103 def ch(lst):
104 lst=[keys[x] for x in lst];
105 lst.sort()
106 lst.insert(0,"")
107 return lst
109 root.write("","",[ch(x) for x in [initials,groups,finals,tones]])