8 return "["+("".join([("\\." if x
is "." else x
) for x
in xs
]))+"]"
10 #tree structure: top level chooses between initials (perhaps blank), next level groups, and so on
16 def getChild(self
, ch
):
17 if (ch
not in self
.children
):
18 self
.children
[ch
] = Group();
19 return self
.children
[ch
];
21 #lists should contain (a list of ordered initials), then (a list of groups), then finals, then tones,
22 # but only starting with whichever will be required from this level of the tree down.
23 def write(self
, indent
, name
, lists
):
26 s
='&#x'+hex(ord(sym
))[2:]+';'
27 print indent
+ '<s d="'+s
+'" t="'+s
+'"/>';
30 if ch
not in self
.children
:
34 desc
=indent
+ '<group label="" visible="no"'
36 n
='&#x'+string
.upper(hex(ord(ch
))[2:])+';' #hex() returns "0x..."
37 desc
=indent
+ '<group label="'+n
+'"'
38 if self
.children
[ch
].syms
:
39 desc
+=' name="'+name
+n
+'"'
41 self
.children
[ch
].write(indent
+" ",name
+n
,lists
[1:]);
43 print indent
+ "</group>";
44 for ch
in self
.children
:
45 print "WARNING Did not output",(["tone","final","group","initial"][len(lists
)-1]),ch
.encode("utf-8")
50 keypat
= re
.compile("([^ ]+) +([^ \r\n]+)",re
.UNICODE
);
53 with
open(sys
.argv
[1]) as file:
55 if (re
.match("%keyname",line
) is not None):
57 #line should be the keyname begin.
59 if (re
.match("%keyname",line
) is not None): break
60 m
= keypat
.match(unicode(line
,"utf-8"));
62 die("Error, could not parse keyname"+line
)
64 keys
[m
.group(1)] = m
.group(2);
65 #line should be the keyname end
72 for key
,value
in keys
.items():
74 if v
==0x3127 or v
==0x3128 or v
==0x3129:
76 elif v
>=0x3105 and v
<0x311A:
78 elif v
>=0x311A and v
<=0x3127:
80 elif v
==0x02CA or v
==0x02C7 or v
==0x02CB or v
==0x02D9: #0x30FB is a big version of 02d9
83 die("**ERROR** unknown bopomofo "+key
+" = "+value
)
84 pat
= "("+regor(initials
)+"?)("+regor(groups
)+"?)("+regor(finals
)+"?)("+regor(tones
)+"?)[\s\t]*([^\s\t]+)\n"
86 pattern
=re
.compile(pat
,re
.UNICODE
);
89 if (re
.match("%chardef +begin",line
) is not None):
91 die("Expected chardef begin, got "+line
)
96 if (re
.match("%chardef +end",line
) is not None): break
97 m
=pattern
.match(unicode(line
,"utf-8"))
99 root
.getChild(keys
[m
.group(1)]).getChild(keys
[m
.group(2)]).getChild(keys
[m
.group(3)]).getChild(keys
[m
.group(4)]).addSym(m
.group(5))
101 die("**ERROR** matching "+line
)
104 lst
=[keys
[x
] for x
in lst
];
109 root
.write("","",[ch(x
) for x
in [initials
,groups
,finals
,tones
]])