Changing functionality and updating readme
[apertium.git] / apertium-forms-server / pair.py
blob2dbba1690ca2a95e0edb9bbc02ae01f8505e4e27
1 #!/usr/bin/python2.5
2 # coding=utf-8
3 # -*- encoding: utf-8 -*-
5 import sys, string, codecs, xml, re, Ft;
6 from Ft.Xml.Domlette import NonvalidatingReader;
7 from Ft.Xml.Domlette import Print, PrettyPrint;
8 from Ft.Xml.XPath import Evaluate;
10 sys.stdout = codecs.getwriter('utf-8')(sys.stdout);
11 sys.stderr = codecs.getwriter('utf-8')(sys.stderr);
13 class Tag: #{
14 name = None;
16 def __init__(self, _name, _list): #{
17 self.name = _name;
18 self.list = _list;
20 for s in self.list: #{
21 print '% ' + s;
25 def get_list(self): #{
26 return self.list;
30 class Paradigm: #{
31 name = None;
33 def __init__(self, _name): #{
34 self.name = _name;
35 self.stems = [];
38 def add_gloss(self, _gloss): #{
39 self.gloss = _gloss;
42 def add_stem(self, _stem, _symlist): #{
43 #print >> sys.stderr, 'add_stem(' + _stem + ', ' + _symlist + ')';
44 self.stems.append((_stem, _symlist));
47 def get_stems(self): #{
48 #print >> sys.stderr, self.stems;
49 return self.stems;
53 class Dictionary: #{
54 display = None;
55 language = None;
56 file = None;
57 side = None;
59 def __init__(self, _side, _language, _file, _doc, _tags): #{
60 self.display = {};
61 self.language = _language;
62 self.file = _file;
63 self.doc = _doc;
64 self.side = _side;
65 self.paradigms = {};
66 self.glosses = {};
67 self.tags = _tags;
70 def get_tags(self): #{
71 return self.tags;
74 def get_tag_by_tag(self, _tag): #{
75 return self.tags[_tag];
78 def get_paradigms(self): #{
79 return self.paradigms;
82 def get_glosses(self): #{
83 return self.glosses;
86 def get_paradigms_by_tag(self, _tag): #{
87 return self.paradigms[_tag];
90 def get_paradigm(self, _name, _tag): #{
91 paradigm = None;
93 paradigm = self.paradigms[_tag].get(_name);
95 if paradigm == None: #{
96 print >> sys.stderr, 'We didn`t find the paradigm in the hash';
97 for _paradigm in self.paradigms[_tag].values(): #{
98 if _paradigm.name == _name: #{
99 paradigm = _paradigm;
103 if paradigm == None: #{
104 return None;
108 # paradigm stems already loaded
109 if len(paradigm.stems) > 0: #{
110 return paradigm;
113 print >> sys.stderr, 'get_paradigm ' , paradigm.name , _name;
114 path = ".//pardef[@n='" + _name + "']";
115 res = self.doc.xpath(path)[0];
117 for entrada in Ft.Xml.XPath.Evaluate('.//e', contextNode=res): #{
118 symlist = '';
120 pair = Ft.Xml.XPath.Evaluate('.//p', contextNode=entrada)[0];
121 left = Ft.Xml.XPath.Evaluate('.//l', contextNode=pair)[0].firstChild;
123 if type(left) != type(None): #{
124 left = Ft.Xml.XPath.Evaluate('.//l', contextNode=pair)[0].firstChild.nodeValue;
127 if type(left) == type(None): #{
128 left = '';
131 right = Ft.Xml.XPath.Evaluate('.//r', contextNode=pair)[0];
133 for symbol in Ft.Xml.XPath.Evaluate('.//s', contextNode=right): #{
134 if symlist != '': #{
135 symlist = symlist + '.' + symbol.getAttributeNS(None, 'n');
137 if symlist == '': #{
138 symlist = symlist + symbol.getAttributeNS(None, 'n');
142 print >> sys.stderr, 'get_paradigm ' , left , symlist;
143 paradigm.add_stem(left, symlist);
146 return paradigm;
149 def set_paradigms_by_tag(self, _tag): #{
150 print self.side + ' set_paradigms_by_tag(' + _tag + ')';
151 paradigms = self.doc.xpath('//pardef');
152 self.paradigms[_tag] = {};
153 needle = '.*__' + _tag + '$';
154 patron = re.compile(needle);
155 for paradigm in paradigms: #{
156 n = paradigm.getAttributeNS(None, 'n');
157 if(patron.match(n)): #{
158 p = Paradigm(n);
159 self.paradigms[_tag][n] = p;
163 print self.side + ' set ' , len(self.paradigms[_tag]) , 'paradigms';
166 def set_display(self, _tag, _mode): #{
167 if _mode == None or _mode == '': #{
168 self.display[_tag] = 'all';
171 if _mode != None and _mode != '': #{
172 self.display[_tag] = _mode;
176 def get_display_by_tag(self, _tag): #{
177 if _tag in self.display: #{
178 return self.display[_tag];
181 return 'all';
184 def get_displays(self): #{
185 return self.display;
189 def add_gloss(self, _paradigm, _gloss): #{
190 self.glosses[_paradigm] = _gloss;
193 def generate_monodix_entrada(self, _lemma, _paradigm, _restriction, _comment, _author): #{
194 incondicional = self.incondicional(_lemma, _paradigm);
196 print >> sys.stderr, 'lemma: ' + _lemma + ', paradigm: ' + _paradigm + ', comment: ' + _comment + ', author: ' + _author;
198 entrada = '';
199 if _restriction == "none" or _restriction == '': #{
200 entrada = entrada + '<e lm="' + _lemma + '" a="' + _author + '">' + "\n";
201 else: #{
202 entrada = entrada + '<e r="' + _restriction + '" lm="' + _lemma + '" a="' + _author + '">' + "\n";
205 entrada = entrada + ' <i>' + incondicional + '</i>' + "\n";
206 entrada = entrada + ' <par n="' + _paradigm + '"/>' + "\n";
207 entrada = entrada + '</e>';
209 if _comment != '': #{
210 entrada = entrada + '<!-- ' + _comment + ' -->' + "\n";
213 print >> sys.stderr, entrada;
215 return entrada;
218 def generate_bidix_entrada(self, _lemma1, _lemma2, _paradigm1, _paradigm2, _tag, _restriction, _comment, _author): #{
219 if _lemma1 == '' or _lemma2 == '' or _paradigm1 == None or _paradigm2 == None: #
220 return '';
223 _symbol_list_left = '';
224 _symbol_list_right = '';
226 if _tag == 'n': #{
228 stems_left = _paradigm1.get_stems();
229 stems_right = _paradigm2.get_stems();
231 if len(stems_left) == len(stems_right): #{
232 tags_left = set();
233 for stem in stems_left: #{
234 tags_left = tags_left | set(stem[1].split('.'));
237 tags_right = set();
238 for stem in stems_right: #{
239 tags_right = tags_right | set(stem[1].split('.'));
242 print >> sys.stderr , 'tags_left:' , tags_left;
243 print >> sys.stderr , 'tags_right:' , tags_right;
244 print >> sys.stderr , 'symdiff:' , tags_left ^ tags_right;
247 _symbol_list_left = '<s n="n"/>';
248 _symbol_list_right = '<s n="n"/>';
251 if _tag == 'vblex': #{
252 _symbol_list_left = '<s n="vblex"/>';
253 _symbol_list_right = '<s n="vblex"/>';
256 if _tag == 'adj': #{
257 _symbol_list_left = '<s n="adj"/>';
258 _symbol_list_right = '<s n="adj"/>';
261 if _tag == 'adv': #{
262 _symbol_list_left = '<s n="adv"/>';
263 _symbol_list_right = '<s n="adv"/>';
266 entrada = '';
268 if _restriction == "none" or _restriction == '': #{
269 entrada = entrada + '<e a="' + _author + '">' + "\n";
271 else: #{
272 entrada = entrada + '<e r="' + _restriction + '" a="' + _author + '">' + "\n";
275 entrada = entrada + ' <p>' + "\n";
276 entrada = entrada + ' <l>' + _lemma1 + _symbol_list_left + '</l>' + "\n";
277 entrada = entrada + ' <r>' + _lemma2 + _symbol_list_right + '</r>' + "\n";
278 entrada = entrada + ' </p>' + "\n";
279 entrada = entrada + '</e>' + "\n";
281 if _comment != '': #{
282 entrada = entrada + '<!-- ' + _comment + ' -->' + "\n";
285 print >> sys.stderr, entrada;
287 return entrada;
290 def incondicional(self, _lemma, _paradigm): #{
291 if _paradigm.count('/') < 1: #{
292 return _lemma;
295 paradigm = _paradigm.decode('utf-8');
296 bar_pos = paradigm.find('/');
297 und_pos = paradigm.find('_');
298 chr_str = (und_pos - bar_pos) - 1;
299 l = _lemma.decode('utf-8');
300 r = l[0:(len(l) - chr_str)];
302 return r.encode('utf-8');
305 def append(self, _entrada): #{
306 print >> sys.stderr, '> ' , self.file;
307 print >> sys.stderr, self.side + ' append(';
308 print >> sys.stderr, _entrada;
309 print >> sys.stderr, ')';
311 for section in self.doc.xpath('.//section'): #{
312 print >> sys.stderr , '+ section : ' + section.getAttributeNS(None, 'id');
313 if section.getAttributeNS(None, 'id') == 'main': #{
314 print >> sys.stderr , 'Appending to section....';
315 insertion_point = section;
316 child_doc = NonvalidatingReader.parseString(_entrada.encode('utf-8'), 'urn:bogus:dummy');
317 child_node = child_doc.xpath('.//e')[0];
318 insertion_point.appendChild(child_node);
319 print >> sys.stderr , 'Appended.';
324 def commit(self): #{
325 print >> sys.stderr, 'Writing out DOM to ' + self.file;
326 f = open(self.file, 'w');
327 Print(self.doc, stream=f);
328 f.close();
329 print >> sys.stderr, 'Written.';
334 class Pair: #{
335 name = None;
337 def __init__(self, _working, _name, _parent): #{
338 self.working = _working;
339 self.name = _name;
340 self.parent = _parent;
341 self.dictionary = {};
342 self.cache = {};
343 self.tags = {};
345 self.populate();
348 def populate(self): #{
349 print self.name + ' populate()';
350 for dictionary in self.parent.xpath('.//dictionary'): #{
351 current_dict = dictionary.getAttributeNS(None, 'n');
352 side = dictionary.getAttributeNS(None, 'side');
353 filename = dictionary.getAttributeNS(None, 'file');
354 filename = self.working + '/cache/' + self.name + '/' + filename;
356 print ' % (' + current_dict + ') ' + side + ', ' + filename;
357 doc = NonvalidatingReader.parseUri('file:///' + filename);
358 self.dictionary[side] = Dictionary(side, current_dict, filename, doc, self.tags);
363 def dictionary(self, _side): #{
364 return self.dictionaries[_side];
367 def add_tag(self, _name, _list): #{
368 print 'add_tag(' + _name + ')';
369 self.tags[_name] = Tag(_name, _list);
370 self.dictionary['left'].set_paradigms_by_tag(_name);
371 self.dictionary['right'].set_paradigms_by_tag(_name);
374 def get_tags(self): #{
375 return self.tags;
378 def commit(self): #{
379 print >> sys.stderr , self.name + ' commit()';
381 self.dictionary['left'].commit();
382 self.dictionary['bidix'].commit();
383 self.dictionary['right'].commit();