3 # -*- encoding: utf-8 -*-
5 import sys
, string
, codecs
, xml
, re
, Ft
;
6 from Ft
.Xml
.Domlette
import NonvalidatingReader
;
7 from Ft
.Xml
.Domlette
import Print
, PrettyPrint
;
8 from Ft
.Xml
.XPath
import Evaluate
;
10 sys
.stdout
= codecs
.getwriter('utf-8')(sys
.stdout
);
11 sys
.stderr
= codecs
.getwriter('utf-8')(sys
.stderr
);
16 def __init__(self
, _name
, _list
): #{
20 for s
in self
.list: #{
25 def get_list(self
): #{
33 def __init__(self
, _name
): #{
38 def add_gloss(self
, _gloss
): #{
42 def add_stem(self
, _stem
, _symlist
): #{
43 #print >> sys.stderr, 'add_stem(' + _stem + ', ' + _symlist + ')';
44 self
.stems
.append((_stem
, _symlist
));
47 def get_stems(self
): #{
48 #print >> sys.stderr, self.stems;
59 def __init__(self
, _side
, _language
, _file
, _doc
, _tags
): #{
61 self
.language
= _language
;
70 def get_tags(self
): #{
74 def get_tag_by_tag(self
, _tag
): #{
75 return self
.tags
[_tag
];
78 def get_paradigms(self
): #{
79 return self
.paradigms
;
82 def get_glosses(self
): #{
86 def get_paradigms_by_tag(self
, _tag
): #{
87 return self
.paradigms
[_tag
];
90 def get_paradigm(self
, _name
, _tag
): #{
93 paradigm
= self
.paradigms
[_tag
].get(_name
);
95 if paradigm
== None: #{
96 print >> sys
.stderr
, 'We didn`t find the paradigm in the hash';
97 for _paradigm
in self
.paradigms
[_tag
].values(): #{
98 if _paradigm
.name
== _name
: #{
103 if paradigm
== None: #{
108 # paradigm stems already loaded
109 if len(paradigm
.stems
) > 0: #{
113 print >> sys
.stderr
, 'get_paradigm ' , paradigm
.name
, _name
;
114 path
= ".//pardef[@n='" + _name
+ "']";
115 res
= self
.doc
.xpath(path
)[0];
117 for entrada
in Ft
.Xml
.XPath
.Evaluate('.//e', contextNode
=res
): #{
120 pair
= Ft
.Xml
.XPath
.Evaluate('.//p', contextNode
=entrada
)[0];
121 left
= Ft
.Xml
.XPath
.Evaluate('.//l', contextNode
=pair
)[0].firstChild
;
123 if type(left
) != type(None): #{
124 left
= Ft
.Xml
.XPath
.Evaluate('.//l', contextNode
=pair
)[0].firstChild
.nodeValue
;
127 if type(left
) == type(None): #{
131 right
= Ft
.Xml
.XPath
.Evaluate('.//r', contextNode
=pair
)[0];
133 for symbol
in Ft
.Xml
.XPath
.Evaluate('.//s', contextNode
=right
): #{
135 symlist
= symlist
+ '.' + symbol
.getAttributeNS(None, 'n');
138 symlist
= symlist
+ symbol
.getAttributeNS(None, 'n');
142 print >> sys
.stderr
, 'get_paradigm ' , left
, symlist
;
143 paradigm
.add_stem(left
, symlist
);
149 def set_paradigms_by_tag(self
, _tag
): #{
150 print self
.side
+ ' set_paradigms_by_tag(' + _tag
+ ')';
151 paradigms
= self
.doc
.xpath('//pardef');
152 self
.paradigms
[_tag
] = {};
153 needle
= '.*__' + _tag
+ '$';
154 patron
= re
.compile(needle
);
155 for paradigm
in paradigms
: #{
156 n
= paradigm
.getAttributeNS(None, 'n');
157 if(patron
.match(n
)): #{
159 self
.paradigms
[_tag
][n
] = p
;
163 print self
.side
+ ' set ' , len(self
.paradigms
[_tag
]) , 'paradigms';
166 def set_display(self
, _tag
, _mode
): #{
167 if _mode
== None or _mode
== '': #{
168 self
.display
[_tag
] = 'all';
171 if _mode
!= None and _mode
!= '': #{
172 self
.display
[_tag
] = _mode
;
176 def get_display_by_tag(self
, _tag
): #{
177 if _tag
in self
.display
: #{
178 return self
.display
[_tag
];
184 def get_displays(self
): #{
189 def add_gloss(self
, _paradigm
, _gloss
): #{
190 self
.glosses
[_paradigm
] = _gloss
;
193 def generate_monodix_entrada(self
, _lemma
, _paradigm
, _restriction
, _comment
, _author
): #{
194 incondicional
= self
.incondicional(_lemma
, _paradigm
);
196 print >> sys
.stderr
, 'lemma: ' + _lemma
+ ', paradigm: ' + _paradigm
+ ', comment: ' + _comment
+ ', author: ' + _author
;
199 if _restriction
== "none" or _restriction
== '': #{
200 entrada
= entrada
+ '<e lm="' + _lemma
+ '" a="' + _author
+ '">' + "\n";
202 entrada
= entrada
+ '<e r="' + _restriction
+ '" lm="' + _lemma
+ '" a="' + _author
+ '">' + "\n";
205 entrada
= entrada
+ ' <i>' + incondicional
+ '</i>' + "\n";
206 entrada
= entrada
+ ' <par n="' + _paradigm
+ '"/>' + "\n";
207 entrada
= entrada
+ '</e>';
209 if _comment
!= '': #{
210 entrada
= entrada
+ '<!-- ' + _comment
+ ' -->' + "\n";
213 print >> sys
.stderr
, entrada
;
218 def generate_bidix_entrada(self
, _lemma1
, _lemma2
, _paradigm1
, _paradigm2
, _tag
, _restriction
, _comment
, _author
): #{
219 if _lemma1
== '' or _lemma2
== '' or _paradigm1
== None or _paradigm2
== None: #
223 _symbol_list_left
= '';
224 _symbol_list_right
= '';
228 stems_left
= _paradigm1
.get_stems();
229 stems_right
= _paradigm2
.get_stems();
231 if len(stems_left
) == len(stems_right
): #{
233 for stem
in stems_left
: #{
234 tags_left
= tags_left |
set(stem
[1].split('.'));
238 for stem
in stems_right
: #{
239 tags_right
= tags_right |
set(stem
[1].split('.'));
242 print >> sys
.stderr
, 'tags_left:' , tags_left
;
243 print >> sys
.stderr
, 'tags_right:' , tags_right
;
244 print >> sys
.stderr
, 'symdiff:' , tags_left ^ tags_right
;
247 _symbol_list_left
= '<s n="n"/>';
248 _symbol_list_right
= '<s n="n"/>';
251 if _tag
== 'vblex': #{
252 _symbol_list_left
= '<s n="vblex"/>';
253 _symbol_list_right
= '<s n="vblex"/>';
257 _symbol_list_left
= '<s n="adj"/>';
258 _symbol_list_right
= '<s n="adj"/>';
262 _symbol_list_left
= '<s n="adv"/>';
263 _symbol_list_right
= '<s n="adv"/>';
268 if _restriction
== "none" or _restriction
== '': #{
269 entrada
= entrada
+ '<e a="' + _author
+ '">' + "\n";
272 entrada
= entrada
+ '<e r="' + _restriction
+ '" a="' + _author
+ '">' + "\n";
275 entrada
= entrada
+ ' <p>' + "\n";
276 entrada
= entrada
+ ' <l>' + _lemma1
+ _symbol_list_left
+ '</l>' + "\n";
277 entrada
= entrada
+ ' <r>' + _lemma2
+ _symbol_list_right
+ '</r>' + "\n";
278 entrada
= entrada
+ ' </p>' + "\n";
279 entrada
= entrada
+ '</e>' + "\n";
281 if _comment
!= '': #{
282 entrada
= entrada
+ '<!-- ' + _comment
+ ' -->' + "\n";
285 print >> sys
.stderr
, entrada
;
290 def incondicional(self
, _lemma
, _paradigm
): #{
291 if _paradigm
.count('/') < 1: #{
295 paradigm
= _paradigm
.decode('utf-8');
296 bar_pos
= paradigm
.find('/');
297 und_pos
= paradigm
.find('_');
298 chr_str
= (und_pos
- bar_pos
) - 1;
299 l
= _lemma
.decode('utf-8');
300 r
= l
[0:(len(l
) - chr_str
)];
302 return r
.encode('utf-8');
305 def append(self
, _entrada
): #{
306 print >> sys
.stderr
, '> ' , self
.file;
307 print >> sys
.stderr
, self
.side
+ ' append(';
308 print >> sys
.stderr
, _entrada
;
309 print >> sys
.stderr
, ')';
311 for section
in self
.doc
.xpath('.//section'): #{
312 print >> sys
.stderr
, '+ section : ' + section
.getAttributeNS(None, 'id');
313 if section
.getAttributeNS(None, 'id') == 'main': #{
314 print >> sys
.stderr
, 'Appending to section....';
315 insertion_point
= section
;
316 child_doc
= NonvalidatingReader
.parseString(_entrada
.encode('utf-8'), 'urn:bogus:dummy');
317 child_node
= child_doc
.xpath('.//e')[0];
318 insertion_point
.appendChild(child_node
);
319 print >> sys
.stderr
, 'Appended.';
325 print >> sys
.stderr
, 'Writing out DOM to ' + self
.file;
326 f
= open(self
.file, 'w');
327 Print(self
.doc
, stream
=f
);
329 print >> sys
.stderr
, 'Written.';
337 def __init__(self
, _working
, _name
, _parent
): #{
338 self
.working
= _working
;
340 self
.parent
= _parent
;
341 self
.dictionary
= {};
348 def populate(self
): #{
349 print self
.name
+ ' populate()';
350 for dictionary
in self
.parent
.xpath('.//dictionary'): #{
351 current_dict
= dictionary
.getAttributeNS(None, 'n');
352 side
= dictionary
.getAttributeNS(None, 'side');
353 filename
= dictionary
.getAttributeNS(None, 'file');
354 filename
= self
.working
+ '/cache/' + self
.name
+ '/' + filename
;
356 print ' % (' + current_dict
+ ') ' + side
+ ', ' + filename
;
357 doc
= NonvalidatingReader
.parseUri('file:///' + filename
);
358 self
.dictionary
[side
] = Dictionary(side
, current_dict
, filename
, doc
, self
.tags
);
363 def dictionary(self
, _side
): #{
364 return self
.dictionaries
[_side
];
367 def add_tag(self
, _name
, _list
): #{
368 print 'add_tag(' + _name
+ ')';
369 self
.tags
[_name
] = Tag(_name
, _list
);
370 self
.dictionary
['left'].set_paradigms_by_tag(_name
);
371 self
.dictionary
['right'].set_paradigms_by_tag(_name
);
374 def get_tags(self
): #{
379 print >> sys
.stderr
, self
.name
+ ' commit()';
381 self
.dictionary
['left'].commit();
382 self
.dictionary
['bidix'].commit();
383 self
.dictionary
['right'].commit();