3 # -*- encoding: utf-8 -*-
5 import sys
, string
, codecs
, xml
, re
, Ft
, md5
, cStringIO
;
6 from Ft
.Xml
.Domlette
import NonvalidatingReader
;
7 from Ft
.Xml
.Domlette
import Print
, PrettyPrint
;
8 from Ft
.Xml
.XPath
import Evaluate
;
10 sys
.stdout
= codecs
.getwriter('utf-8')(sys
.stdout
);
11 sys
.stderr
= codecs
.getwriter('utf-8')(sys
.stderr
);
16 def __init__(self
, _name
, _list
): #{
20 for s
in self
.list: #{
25 def get_list(self
): #{
33 def __init__(self
, _name
): #{
38 def add_gloss(self
, _gloss
): #{
42 def add_stem(self
, _stem
, _symlist
): #{
43 #print >> sys.stderr, 'add_stem(' + _stem + ', ' + _symlist + ')';
44 self
.stems
.append((_stem
, _symlist
));
47 def get_stems(self
): #{
48 #print >> sys.stderr, self.stems;
59 def __init__(self
, _side
, _language
, _file
, _doc
, _tags
, _templates
): #{
61 self
.language
= _language
;
69 self
.templates
= _templates
;
71 if _side
== 'bidix': #{
72 self
.hashes_left
= {};
73 self
.hashes_right
= {};
77 def get_tags(self
): #{
81 def get_tag_by_tag(self
, _tag
): #{
82 return self
.tags
[_tag
];
85 def get_paradigms(self
): #{
86 return self
.paradigms
;
89 def get_glosses(self
): #{
93 def get_paradigms_by_tag(self
, _tag
): #{
94 return self
.paradigms
[_tag
];
97 def get_paradigm(self
, _name
, _tag
): #{
100 paradigm
= self
.paradigms
[_tag
].get(_name
);
102 if paradigm
== None: #{
103 print >> sys
.stderr
, 'We didn`t find the paradigm in the hash';
104 for _paradigm
in self
.paradigms
[_tag
].values(): #{
105 if _paradigm
.name
== _name
: #{
106 paradigm
= _paradigm
;
110 if paradigm
== None: #{
115 # paradigm stems already loaded
116 if len(paradigm
.stems
) > 0: #{
120 print >> sys
.stderr
, 'get_paradigm ' , paradigm
.name
, _name
;
121 path
= ".//pardef[@n='" + _name
+ "']";
122 res
= self
.doc
.xpath(path
)[0];
124 for entrada
in Ft
.Xml
.XPath
.Evaluate('.//e', contextNode
=res
): #{
127 pair
= Ft
.Xml
.XPath
.Evaluate('.//p', contextNode
=entrada
)[0];
128 left
= Ft
.Xml
.XPath
.Evaluate('.//l', contextNode
=pair
)[0].firstChild
;
130 if type(left
) != type(None): #{
131 left
= Ft
.Xml
.XPath
.Evaluate('.//l', contextNode
=pair
)[0].firstChild
.nodeValue
;
134 if type(left
) == type(None): #{
138 right
= Ft
.Xml
.XPath
.Evaluate('.//r', contextNode
=pair
)[0];
140 for symbol
in Ft
.Xml
.XPath
.Evaluate('.//s', contextNode
=right
): #{
142 symlist
= symlist
+ '.' + symbol
.getAttributeNS(None, 'n');
145 symlist
= symlist
+ symbol
.getAttributeNS(None, 'n');
149 print >> sys
.stderr
, 'get_paradigm ' , left
, symlist
;
150 paradigm
.add_stem(left
, symlist
);
156 def hash_paradigm(self
, _paradigm
, _tag
): #{
158 current_paradigm
= _paradigm
.getAttributeNS(None, 'n');
159 current_category
= _tag
;
161 for entrada
in Ft
.Xml
.XPath
.Evaluate('.//e', contextNode
=_paradigm
): #{
162 restriction
= entrada
.getAttributeNS(None, 'r');
163 if type(restriction
) == None: #{
169 for symbol
in Ft
.Xml
.XPath
.Evaluate('.//s', contextNode
=entrada
): #{
170 symbols
= symbols
+ symbol
.getAttributeNS(None, 'n') + '.';
173 paradigm_hash
.append((restriction
, symbols
));
176 m
.update(str(set(paradigm_hash
)));
177 key
= current_category
+ '.' + m
.hexdigest();
179 #print >> sys.stderr, 'generate_hash: ' + current_category + '.' + m.hexdigest() , current_paradigm;
181 return current_category
+ '.' + m
.hexdigest();
184 def set_paradigms_by_tag(self
, _tag
): #{
185 print self
.side
+ ' set_paradigms_by_tag(' + _tag
+ ')';
186 paradigms
= self
.doc
.xpath('//pardef');
187 self
.paradigms
[_tag
] = {};
188 needle
= '.*__' + _tag
+ '$';
189 patron
= re
.compile(needle
);
190 for paradigm
in paradigms
: #{
191 n
= paradigm
.getAttributeNS(None, 'n');
192 if(patron
.match(n
)): #{
194 self
.paradigms
[_tag
][n
] = p
;
195 self
.hashes
[n
.decode('utf-8')] = self
.hash_paradigm(paradigm
, _tag
);
199 print self
.side
+ ' set ' , len(self
.paradigms
[_tag
]) , 'paradigms';
202 def set_display(self
, _tag
, _mode
): #{
203 if _mode
== None or _mode
== '': #{
204 self
.display
[_tag
] = 'all';
207 if _mode
!= None and _mode
!= '': #{
208 self
.display
[_tag
] = _mode
;
212 def get_display_by_tag(self
, _tag
): #{
213 if _tag
in self
.display
: #{
214 return self
.display
[_tag
];
220 def get_displays(self
): #{
225 def add_gloss(self
, _tag
, _paradigm
, _gloss
): #{
226 self
.glosses
[_paradigm
] = _gloss
;
229 def generate_monodix_entrada(self
, _lemma
, _paradigm
, _restriction
, _comment
, _author
): #{
230 incondicional
= self
.incondicional(_lemma
, _paradigm
);
232 print >> sys
.stderr
, 'lemma: ' + _lemma
+ ', paradigm: ' + _paradigm
+ ', comment: ' + _comment
+ ', author: ' + _author
;
235 if _restriction
== "none" or _restriction
== '': #{
236 entrada
= entrada
+ '<e lm="' + _lemma
+ '" a="' + _author
+ '">' + "\n";
238 entrada
= entrada
+ '<e r="' + _restriction
+ '" lm="' + _lemma
+ '" a="' + _author
+ '">' + "\n";
241 entrada
= entrada
+ ' <i>' + incondicional
+ '</i>' + "\n";
242 entrada
= entrada
+ ' <par n="' + _paradigm
+ '"/>' + "\n";
243 entrada
= entrada
+ '</e>';
245 if _comment
!= '': #{
246 entrada
= entrada
+ '<!-- ' + _comment
+ ' -->' + "\n";
249 print >> sys
.stderr
, entrada
;
254 def generate_generic_bidix_entrada(self
, _lemma1
, _lemma2
, _tag
, _restriction
, _comment
, _author
): #{
257 if _restriction
== "none" or _restriction
== '': #{
258 entrada
= entrada
+ '<e a="' + _author
+ '">' + "\n";
261 entrada
= entrada
+ '<e r="' + _restriction
+ '" a="' + _author
+ '">' + "\n";
264 _symbol_list_left
= '<s n="' + _tag
+ '"/>';
265 _symbol_list_right
= '<s n="' + _tag
+ '"/>';
267 entrada
= entrada
+ ' <p>' + "\n";
268 entrada
= entrada
+ ' <l>' + _lemma1
+ _symbol_list_left
+ '</l>' + "\n";
269 entrada
= entrada
+ ' <r>' + _lemma2
+ _symbol_list_right
+ '</r>' + "\n";
270 entrada
= entrada
+ ' </p>' + "\n";
271 entrada
= entrada
+ '</e>' + "\n";
273 if _comment
!= '': #{
274 entrada
= entrada
+ '<!-- ' + _comment
+ ' -->' + "\n";
277 print >> sys
.stderr
, entrada
;
282 def generate_bidix_entrada(self
, _lemma1
, _lemma2
, _paradigm1
, _paradigm2
, _tag
, _restriction
, _comment
, _author
): #{
283 print >> sys
.stderr
, 'generate_bidix_entrada (' + self
.side
+ ')';
285 if _lemma1
== '' or _lemma2
== '' or _paradigm1
== None or _paradigm2
== None: #
289 if _tag
== 'n' or _tag
== 'adj' or _tag
== 'np': #{
290 hash_left
= self
.hashes_left
[_paradigm1
.name
];
291 hash_right
= self
.hashes_right
[_paradigm2
.name
];
292 print >> sys
.stderr
, 'left: ' + hash_left
+ '; right: ' + hash_right
+ '; n. templates: ' + str(len(self
.templates
));
294 if hash_left
not in self
.templates
: #{
295 print >> sys
.stderr
, 'left hash not found in templates: ' + hash_left
;
296 return self
.generate_generic_bidix_entrada(_lemma1
, _lemma2
, _tag
, _restriction
, _comment
, _author
);
299 if hash_right
not in self
.templates
[hash_left
]: #{
300 print >> sys
.stderr
, 'right hash not found in templates[' + hash_left
+ ']: ' + hash_right
;
301 for key
in self
.templates
[hash_left
]: #{
302 print >>sys
.stderr
, '* ' + key
;
304 return self
.generate_generic_bidix_entrada(_lemma1
, _lemma2
, _tag
, _restriction
, _comment
, _author
);
307 entrada
= self
.templates
[hash_left
][hash_right
];
308 entrada
= entrada
.replace('lemma1', _lemma1
).replace('lemma2', _lemma2
);
310 print >> sys
.stderr
, entrada
;
315 return self
.generate_generic_bidix_entrada(_lemma1
, _lemma2
, _tag
, _restriction
, _comment
, _author
);
318 def incondicional(self
, _lemma
, _paradigm
): #{
319 if _paradigm
.count('/') < 1: #{
323 paradigm
= _paradigm
.decode('utf-8');
324 bar_pos
= paradigm
.find('/');
325 und_pos
= paradigm
.find('_');
326 chr_str
= (und_pos
- bar_pos
) - 1;
327 l
= _lemma
.decode('utf-8');
328 r
= l
[0:(len(l
) - chr_str
)];
330 return r
.encode('utf-8');
333 def append(self
, _entrada
): #{
334 print >> sys
.stderr
, '> ' , self
.file;
335 print >> sys
.stderr
, self
.side
+ ' append(';
336 print >> sys
.stderr
, _entrada
;
337 print >> sys
.stderr
, ')';
339 for section
in self
.doc
.xpath('.//section'): #{
340 print >> sys
.stderr
, '+ section : ' + section
.getAttributeNS(None, 'id');
341 if section
.getAttributeNS(None, 'id') == 'main': #{
342 print >> sys
.stderr
, 'Appending to section....';
343 insertion_point
= section
;
344 child_doc
= NonvalidatingReader
.parseString(_entrada
.encode('utf-8'), 'urn:bogus:dummy');
345 child_node
= child_doc
.xpath('.//e')[0];
346 insertion_point
.appendChild(child_node
);
347 print >> sys
.stderr
, 'Appended.';
353 print >> sys
.stderr
, 'Writing out DOM to ' + self
.file;
354 f
= open(self
.file, 'w');
355 Print(self
.doc
, stream
=f
);
357 print >> sys
.stderr
, 'Written.';
365 def __init__(self
, _working
, _name
, _parent
): #{
366 self
.working
= _working
;
368 self
.parent
= _parent
;
369 self
.dictionary
= {};
377 def populate(self
): #{
378 print self
.name
+ ' populate()';
379 for dictionary
in self
.parent
.xpath('.//dictionary'): #{
380 current_dict
= dictionary
.getAttributeNS(None, 'n');
381 side
= dictionary
.getAttributeNS(None, 'side');
382 filename
= dictionary
.getAttributeNS(None, 'file');
383 filename
= self
.working
+ '/cache/' + self
.name
+ '/' + filename
;
385 print ' % (' + current_dict
+ ') ' + side
+ ', ' + filename
;
386 doc
= NonvalidatingReader
.parseUri('file:///' + filename
);
387 self
.dictionary
[side
] = Dictionary(side
, current_dict
, filename
, doc
, self
.tags
, self
.templates
);
389 self
.dictionary
['bidix'].hashes_left
= self
.dictionary
['left'].hashes
;
390 self
.dictionary
['bidix'].hashes_right
= self
.dictionary
['right'].hashes
;
393 def dictionary(self
, _side
): #{
394 return self
.dictionaries
[_side
];
397 def set_templates(self
, _templates
): #{
398 print >> sys
.stderr
, 'Loaded ' + str(len(_templates
)) + ' templates';
399 self
.templates
= _templates
;
401 self
.dictionary
['left'].templates
= _templates
;
402 self
.dictionary
['bidix'].templates
= _templates
;
403 self
.dictionary
['right'].templates
= _templates
;
406 def add_tag(self
, _name
, _list
): #{
407 print 'add_tag(' + _name
+ ')';
408 self
.tags
[_name
] = Tag(_name
, _list
);
409 self
.dictionary
['left'].set_paradigms_by_tag(_name
);
410 self
.dictionary
['right'].set_paradigms_by_tag(_name
);
413 def get_tags(self
): #{
418 print >> sys
.stderr
, self
.name
+ ' commit()';
420 self
.dictionary
['left'].commit();
421 self
.dictionary
['bidix'].commit();
422 self
.dictionary
['right'].commit();