Poc més
[apertium.git] / apertium-forms-server / pair.py
blobae0dc8e8feaebb0d68616d27bc6646141bb3b3e7
1 #!/usr/bin/python2.5
2 # coding=utf-8
3 # -*- encoding: utf-8 -*-
5 import sys, string, codecs, xml, re, Ft, md5, cStringIO;
6 from Ft.Xml.Domlette import NonvalidatingReader;
7 from Ft.Xml.Domlette import Print, PrettyPrint;
8 from Ft.Xml.XPath import Evaluate;
10 sys.stdout = codecs.getwriter('utf-8')(sys.stdout);
11 sys.stderr = codecs.getwriter('utf-8')(sys.stderr);
13 class Tag: #{
14 name = None;
16 def __init__(self, _name, _list): #{
17 self.name = _name;
18 self.list = _list;
20 for s in self.list: #{
21 print '% ' + s;
25 def get_list(self): #{
26 return self.list;
30 class Paradigm: #{
31 name = None;
33 def __init__(self, _name): #{
34 self.name = _name;
35 self.stems = [];
38 def add_gloss(self, _gloss): #{
39 self.gloss = _gloss;
42 def add_stem(self, _stem, _symlist): #{
43 #print >> sys.stderr, 'add_stem(' + _stem + ', ' + _symlist + ')';
44 self.stems.append((_stem, _symlist));
47 def get_stems(self): #{
48 #print >> sys.stderr, self.stems;
49 return self.stems;
53 class Dictionary: #{
54 display = None;
55 language = None;
56 file = None;
57 side = None;
59 def __init__(self, _side, _language, _file, _doc, _tags, _templates): #{
60 self.display = {};
61 self.language = _language;
62 self.file = _file;
63 self.doc = _doc;
64 self.side = _side;
65 self.paradigms = {};
66 self.glosses = {};
67 self.tags = _tags;
68 self.hashes = {};
69 self.templates = _templates;
71 if _side == 'bidix': #{
72 self.hashes_left = {};
73 self.hashes_right = {};
77 def get_tags(self): #{
78 return self.tags;
81 def get_tag_by_tag(self, _tag): #{
82 return self.tags[_tag];
85 def get_paradigms(self): #{
86 return self.paradigms;
89 def get_glosses(self): #{
90 return self.glosses;
93 def get_paradigms_by_tag(self, _tag): #{
94 return self.paradigms[_tag];
97 def get_paradigm(self, _name, _tag): #{
98 paradigm = None;
100 paradigm = self.paradigms[_tag].get(_name);
102 if paradigm == None: #{
103 print >> sys.stderr, 'We didn`t find the paradigm in the hash';
104 for _paradigm in self.paradigms[_tag].values(): #{
105 if _paradigm.name == _name: #{
106 paradigm = _paradigm;
110 if paradigm == None: #{
111 return None;
115 # paradigm stems already loaded
116 if len(paradigm.stems) > 0: #{
117 return paradigm;
120 print >> sys.stderr, 'get_paradigm ' , paradigm.name , _name;
121 path = ".//pardef[@n='" + _name + "']";
122 res = self.doc.xpath(path)[0];
124 for entrada in Ft.Xml.XPath.Evaluate('.//e', contextNode=res): #{
125 symlist = '';
127 pair = Ft.Xml.XPath.Evaluate('.//p', contextNode=entrada)[0];
128 left = Ft.Xml.XPath.Evaluate('.//l', contextNode=pair)[0].firstChild;
130 if type(left) != type(None): #{
131 left = Ft.Xml.XPath.Evaluate('.//l', contextNode=pair)[0].firstChild.nodeValue;
134 if type(left) == type(None): #{
135 left = '';
138 right = Ft.Xml.XPath.Evaluate('.//r', contextNode=pair)[0];
140 for symbol in Ft.Xml.XPath.Evaluate('.//s', contextNode=right): #{
141 if symlist != '': #{
142 symlist = symlist + '.' + symbol.getAttributeNS(None, 'n');
144 if symlist == '': #{
145 symlist = symlist + symbol.getAttributeNS(None, 'n');
149 print >> sys.stderr, 'get_paradigm ' , left , symlist;
150 paradigm.add_stem(left, symlist);
153 return paradigm;
156 def hash_paradigm(self, _paradigm, _tag): #{
157 paradigm_hash = [];
158 current_paradigm = _paradigm.getAttributeNS(None, 'n');
159 current_category = _tag;
161 for entrada in Ft.Xml.XPath.Evaluate('.//e', contextNode=_paradigm): #{
162 restriction = entrada.getAttributeNS(None, 'r');
163 if type(restriction) == None: #{
164 restriction = '';
167 symbols = '';
169 for symbol in Ft.Xml.XPath.Evaluate('.//s', contextNode=entrada): #{
170 symbols = symbols + symbol.getAttributeNS(None, 'n') + '.';
173 paradigm_hash.append((restriction, symbols));
175 m = md5.new();
176 m.update(str(set(paradigm_hash)));
177 key = current_category + '.' + m.hexdigest();
179 #print >> sys.stderr, 'generate_hash: ' + current_category + '.' + m.hexdigest() , current_paradigm;
181 return current_category + '.' + m.hexdigest();
184 def set_paradigms_by_tag(self, _tag): #{
185 print self.side + ' set_paradigms_by_tag(' + _tag + ')';
186 paradigms = self.doc.xpath('//pardef');
187 self.paradigms[_tag] = {};
188 needle = '.*__' + _tag + '$';
189 patron = re.compile(needle);
190 for paradigm in paradigms: #{
191 n = paradigm.getAttributeNS(None, 'n');
192 if(patron.match(n)): #{
193 p = Paradigm(n);
194 self.paradigms[_tag][n] = p;
195 self.hashes[n.decode('utf-8')] = self.hash_paradigm(paradigm, _tag);
199 print self.side + ' set ' , len(self.paradigms[_tag]) , 'paradigms';
202 def set_display(self, _tag, _mode): #{
203 if _mode == None or _mode == '': #{
204 self.display[_tag] = 'all';
207 if _mode != None and _mode != '': #{
208 self.display[_tag] = _mode;
212 def get_display_by_tag(self, _tag): #{
213 if _tag in self.display: #{
214 return self.display[_tag];
217 return 'all';
220 def get_displays(self): #{
221 return self.display;
225 def add_gloss(self, _tag, _paradigm, _gloss): #{
226 self.glosses[_paradigm] = _gloss;
229 def generate_monodix_entrada(self, _lemma, _paradigm, _restriction, _comment, _author): #{
230 incondicional = self.incondicional(_lemma, _paradigm);
232 print >> sys.stderr, 'lemma: ' + _lemma + ', paradigm: ' + _paradigm + ', comment: ' + _comment + ', author: ' + _author;
234 entrada = '';
235 if _restriction == "none" or _restriction == '': #{
236 entrada = entrada + '<e lm="' + _lemma + '" a="' + _author + '">' + "\n";
237 else: #{
238 entrada = entrada + '<e r="' + _restriction + '" lm="' + _lemma + '" a="' + _author + '">' + "\n";
241 entrada = entrada + ' <i>' + incondicional + '</i>' + "\n";
242 entrada = entrada + ' <par n="' + _paradigm + '"/>' + "\n";
243 entrada = entrada + '</e>';
245 if _comment != '': #{
246 entrada = entrada + '<!-- ' + _comment + ' -->' + "\n";
249 print >> sys.stderr, entrada;
251 return entrada;
254 def generate_generic_bidix_entrada(self, _lemma1, _lemma2, _tag, _restriction, _comment, _author): #{
255 entrada = '';
257 if _restriction == "none" or _restriction == '': #{
258 entrada = entrada + '<e a="' + _author + '">' + "\n";
260 else: #{
261 entrada = entrada + '<e r="' + _restriction + '" a="' + _author + '">' + "\n";
264 _symbol_list_left = '<s n="' + _tag + '"/>';
265 _symbol_list_right = '<s n="' + _tag + '"/>';
267 entrada = entrada + ' <p>' + "\n";
268 entrada = entrada + ' <l>' + _lemma1 + _symbol_list_left + '</l>' + "\n";
269 entrada = entrada + ' <r>' + _lemma2 + _symbol_list_right + '</r>' + "\n";
270 entrada = entrada + ' </p>' + "\n";
271 entrada = entrada + '</e>' + "\n";
273 if _comment != '': #{
274 entrada = entrada + '<!-- ' + _comment + ' -->' + "\n";
277 print >> sys.stderr, entrada;
279 return entrada;
282 def generate_bidix_entrada(self, _lemma1, _lemma2, _paradigm1, _paradigm2, _tag, _restriction, _comment, _author): #{
283 print >> sys.stderr, 'generate_bidix_entrada (' + self.side + ')';
285 if _lemma1 == '' or _lemma2 == '' or _paradigm1 == None or _paradigm2 == None: #
286 return '';
289 if _tag == 'n' or _tag == 'adj' or _tag == 'np': #{
290 hash_left = self.hashes_left[_paradigm1.name];
291 hash_right = self.hashes_right[_paradigm2.name];
292 print >> sys.stderr, 'left: ' + hash_left + '; right: ' + hash_right + '; n. templates: ' + str(len(self.templates));
294 if hash_left not in self.templates: #{
295 print >> sys.stderr, 'left hash not found in templates: ' + hash_left;
296 return self.generate_generic_bidix_entrada(_lemma1, _lemma2, _tag, _restriction, _comment, _author);
299 if hash_right not in self.templates[hash_left]: #{
300 print >> sys.stderr, 'right hash not found in templates[' + hash_left + ']: ' + hash_right;
301 for key in self.templates[hash_left]: #{
302 print >>sys.stderr, '* ' + key;
304 return self.generate_generic_bidix_entrada(_lemma1, _lemma2, _tag, _restriction, _comment, _author);
307 entrada = self.templates[hash_left][hash_right];
308 entrada = entrada.replace('lemma1', _lemma1).replace('lemma2', _lemma2);
310 print >> sys.stderr, entrada;
312 return entrada;
315 return self.generate_generic_bidix_entrada(_lemma1, _lemma2, _tag, _restriction, _comment, _author);
318 def incondicional(self, _lemma, _paradigm): #{
319 if _paradigm.count('/') < 1: #{
320 return _lemma;
323 paradigm = _paradigm.decode('utf-8');
324 bar_pos = paradigm.find('/');
325 und_pos = paradigm.find('_');
326 chr_str = (und_pos - bar_pos) - 1;
327 l = _lemma.decode('utf-8');
328 r = l[0:(len(l) - chr_str)];
330 return r.encode('utf-8');
333 def append(self, _entrada): #{
334 print >> sys.stderr, '> ' , self.file;
335 print >> sys.stderr, self.side + ' append(';
336 print >> sys.stderr, _entrada;
337 print >> sys.stderr, ')';
339 for section in self.doc.xpath('.//section'): #{
340 print >> sys.stderr , '+ section : ' + section.getAttributeNS(None, 'id');
341 if section.getAttributeNS(None, 'id') == 'main': #{
342 print >> sys.stderr , 'Appending to section....';
343 insertion_point = section;
344 child_doc = NonvalidatingReader.parseString(_entrada.encode('utf-8'), 'urn:bogus:dummy');
345 child_node = child_doc.xpath('.//e')[0];
346 insertion_point.appendChild(child_node);
347 print >> sys.stderr , 'Appended.';
352 def commit(self): #{
353 print >> sys.stderr, 'Writing out DOM to ' + self.file;
354 f = open(self.file, 'w');
355 Print(self.doc, stream=f);
356 f.close();
357 print >> sys.stderr, 'Written.';
362 class Pair: #{
363 name = None;
365 def __init__(self, _working, _name, _parent): #{
366 self.working = _working;
367 self.name = _name;
368 self.parent = _parent;
369 self.dictionary = {};
370 self.cache = {};
371 self.tags = {};
372 self.templates = {};
374 self.populate();
377 def populate(self): #{
378 print self.name + ' populate()';
379 for dictionary in self.parent.xpath('.//dictionary'): #{
380 current_dict = dictionary.getAttributeNS(None, 'n');
381 side = dictionary.getAttributeNS(None, 'side');
382 filename = dictionary.getAttributeNS(None, 'file');
383 filename = self.working + '/cache/' + self.name + '/' + filename;
385 print ' % (' + current_dict + ') ' + side + ', ' + filename;
386 doc = NonvalidatingReader.parseUri('file:///' + filename);
387 self.dictionary[side] = Dictionary(side, current_dict, filename, doc, self.tags, self.templates);
389 self.dictionary['bidix'].hashes_left = self.dictionary['left'].hashes;
390 self.dictionary['bidix'].hashes_right = self.dictionary['right'].hashes;
393 def dictionary(self, _side): #{
394 return self.dictionaries[_side];
397 def set_templates(self, _templates): #{
398 print >> sys.stderr , 'Loaded ' + str(len(_templates)) + ' templates';
399 self.templates = _templates;
401 self.dictionary['left'].templates = _templates;
402 self.dictionary['bidix'].templates = _templates;
403 self.dictionary['right'].templates = _templates;
406 def add_tag(self, _name, _list): #{
407 print 'add_tag(' + _name + ')';
408 self.tags[_name] = Tag(_name, _list);
409 self.dictionary['left'].set_paradigms_by_tag(_name);
410 self.dictionary['right'].set_paradigms_by_tag(_name);
413 def get_tags(self): #{
414 return self.tags;
417 def commit(self): #{
418 print >> sys.stderr , self.name + ' commit()';
420 self.dictionary['left'].commit();
421 self.dictionary['bidix'].commit();
422 self.dictionary['right'].commit();