Adding a bit
[apertium.git] / apertium-dictionary-form / pair.php
blob2f2fc50d3fac490458db4c53bebc59676c451d7b
1 <?
2 class Paradigm {
3 public $name;
4 public $stems;
6 function Paradigm($_name) {
7 $this->name = $_name;
8 $this->stems = array();
11 function add_stem($_stem, $_symbols) {
12 $this->stems[$_stem] = array();
13 array_push($this->stems[$_stem], $_symbols);
16 function stems() {
17 return $this->stems;
21 class Dictionary {
22 public $language;
23 public $file;
24 public $doc;
25 public $paradigms;
26 public $depth;
27 public $glosses;
28 public $display_modes;
30 function Dictionary($_language, $_file, $_doc, $_tags, $_glosses) {
31 $this->language = $_language;
32 $this->file = $_file;
33 $this->doc = $_doc;
34 $this->glosses = $_glosses;
36 foreach($_tags as $tag) {
37 $this->paradigms[$tag] = array();
38 $needle = "__" . $tag;
39 $cmd = 'cat */*/' . $_file . ' | grep "pardef" | grep "' . $needle . '\"" | cut -f2 -d= | sed "s/<!--.*-->//g" ';
40 $cmd = $cmd . '| sed "s/<e>//g"'; # hack for elements on the same line as pardef (mal) add further hacks below.
41 $res = shell_exec($cmd);
42 $res = str_replace('"', '', $res);
43 $res = str_replace('>', '', $res);
45 foreach(explode("\n", $res) as $paradigm_name) {
46 if($paradigm_name != "") {
47 $par = new Paradigm($paradigm_name);
48 array_push($this->paradigms[$tag], $par);
53 /*** this is more correct, but the above is substantially faster.
54 $_paradigms = $this->doc->getElementsByTagName('pardef');
56 foreach($_tags as $tag) {
57 $this->paradigms[$tag] = array();
58 $needle = "/__" . $tag . "$/";
60 foreach($_paradigms as $paradigm) {
61 $paradigm_name = $paradigm->getAttribute('n');
63 if(preg_match($needle, $paradigm_name)) {
64 $par = new Paradigm($paradigm_name);
65 array_push($this->paradigms[$tag], $par);
67 $entradas = $paradigm->getElementsByTagName('e');
69 foreach($entradas as $entrada) {
70 $slist = '';
72 $pair = $entrada->getElementsByTagName('p')->item(0);
73 $left = $pair->getElementsByTagName('l')->item(0)->nodeValue;
74 $right = $pair->getElementsByTagName('r')->item(0);
76 $symbols = $right->getElementsByTagName('s');
78 foreach($symbols as $symbol) {
79 if($slist != '') {
80 $slist = $slist . '.' . $symbol->getAttribute('n');
81 } else {
82 $slist = $slist . $symbol->getAttribute('n');
86 $par->add_stem($left, $slist);
91 ***/
94 function set_display_mode($_modes) {
95 $this->display_modes = $_modes;
98 function get_display_mode($_tag) {
99 return $this->display_modes[$_tag];
102 function paradigms($_tag) {
103 return $this->paradigms[$_tag];
106 function get_paradigm($_name, $_tag) {
107 foreach($this->paradigms[$_tag] as $paradigm) {
108 if($paradigm->name == $_name) {
109 $xpath = new DOMXPath($this->doc);
110 $path = "//pardef[@n='" . $_name . "']";
111 $res = $xpath->evaluate($path);
113 $par = $res->item(0);
115 $entradas = $par->getElementsByTagName('e');
117 foreach($entradas as $entrada) {
118 $slist = '';
120 $pair = $entrada->getElementsByTagName('p')->item(0);
121 $left = $pair->getElementsByTagName('l')->item(0)->nodeValue;
122 $right = $pair->getElementsByTagName('r')->item(0);
124 $symbols = $right->getElementsByTagName('s');
126 foreach($symbols as $symbol) {
127 if($slist != '') {
128 $slist = $slist . '.' . $symbol->getAttribute('n');
129 } else {
130 $slist = $slist . $symbol->getAttribute('n');
134 $paradigm->add_stem($left, $slist);
137 return $paradigm;
141 print "Couldn't find paradigm";
144 /*** this is more correct, but the above it substantially faster.
145 function get_paradigm($_name, $_tag) {
146 foreach($this->paradigms[$_tag] as $paradigm) {
147 if($paradigm->name == $_name) {
148 return $paradigm;
152 print "Couldn't find paradigm";
154 ***/
156 function lemma_exists($_lemma, $_tag) {
157 $dictionary = 'cache/*/' . $this->file;
158 $command = 'cat ' . $dictionary . ' | grep "\"' . $_lemma . '\"" | wc -l ';
159 $count = shell_exec($command);
161 if($count > 0) {
162 return TRUE;
163 } else {
164 return FALSE;
169 /***
170 function lemma_exists($_lemma, $_tag) {
171 $sections = $this->doc->getElementsByTagName('section');
172 foreach($sections as $section) {
173 $entradas = $section->getElementsByTagName('e');
175 foreach($entradas as $entrada) {
176 $paradigm = $entrada->getElementsByTagName('par')->item(0);
177 if(!($paradigm)) {
178 return FALSE;
179 } else if($entrada->getAttribute('lm') == $_lemma && strstr($paradigm->getAttribute('n'), $_tag)) {
180 return TRUE;
185 return FALSE;
187 ***/
188 function incondicional($_lemma, $_paradigm) {
190 if(!strstr($_paradigm, '/')) {
191 return $_lemma;
194 $bar_pos = strpos($_paradigm, '/');
195 $und_pos = strpos($_paradigm, '_');
196 $chr_str = $und_pos - $bar_pos;
197 $sub_str = strlen($_lemma) - $chr_str;
198 $incondicional = substr($_lemma, 0, $sub_str + 1);
200 return $incondicional;
203 function generate_monodix_entrada($_lemma, $_paradigm, $_comment, $_author) {
204 // <e lm="lemma"><i>lemm</i><par n="paradigm"/></e>
206 $incondicional = $this->incondicional($_lemma, $_paradigm);
208 if($_comment != "") {
209 $entrada = '<e lm="' . $_lemma . '" a="' . $_author . '">' . "\n" .
210 ' <i>' . $incondicional . '</i>' . "\n" .
211 ' <par n="' . $_paradigm . '"/>' . "\n" .
212 '</e>' .
213 '<!-- ' . $_comment . ' -->' . "\n";
214 } else {
215 $entrada = '<e lm="' . $_lemma . '" a="' . $_author . '">' . "\n" .
216 ' <i>' . $incondicional . '</i>' . "\n" .
217 ' <par n="' . $_paradigm . '"/>' . "\n" .
218 '</e>';
222 return $entrada;
225 function generate_bidix_entrada($_lemma1, $_lemma2, $_tag, $_restriction, $_comment, $_author) {
226 // <e><p><l>lemma1<s n="tag"/></l><r>lemma2<s n="tag"/></r></p></e>
228 if($_restriction == "none") {
229 if($_comment != "") {
230 $entrada = '<e a="' . $_author . '">' . "\n" .
231 ' <p>' . "\n" .
232 ' <l>' . $_lemma1 . '<s n="' . $_tag . '"/></l>' . "\n" .
233 ' <r>' . $_lemma2 . '<s n="' . $_tag . '"/></r>' . "\n" .
234 ' </p>' . "\n" .
235 '</e>' . "\n" .
236 '<!-- ' . $_comment . ' -->' . "\n";
237 } else {
238 $entrada = '<e a="' . $_author . '">' . "\n" .
239 ' <p>' . "\n" .
240 ' <l>' . $_lemma1 . '<s n="' . $_tag . '"/></l>' . "\n" .
241 ' <r>' . $_lemma2 . '<s n="' . $_tag . '"/></r>' . "\n" .
242 ' </p>' . "\n" .
243 '</e>' . "\n";
245 } else {
246 $entrada = '<e r="' . $_restriction . '" a="' . $_author . '"><p><l>' . $_lemma1 . '<s n="' . $_tag . '"/></l><r>' . $_lemma2 . '<s n="' . $_tag . '"/></r></p></e>' . "\n";
249 return $entrada;
252 function generate_bidix_entrada_from_template($_dir, $_template, $_lemma1, $_lemma2, $_comment, $_author) {
254 $text = file_get_contents($_dir . $_template);
256 $entrada = str_replace('entry_author', $_author, $text);
257 $entrada = str_replace('entry_lemma_left', $_lemma1, $entrada);
258 $entrada = str_replace('entry_lemma_right', $_lemma2, $entrada);
260 return $entrada;
263 function commit($cache_dir, $e) {
264 $doc = $this->doc;
265 $filepath = $cache_dir . '/' . $this->file;
267 $sections = $doc->getElementsByTagName('section');
268 $insertion_point = '';
270 foreach($sections as $section) {
271 $id = $section->getAttribute("id");
272 if($id == "main") {
273 $insertion_point = $section;
277 if($insertion_point != '') {
279 $frag = $doc->createDocumentFragment();
280 $frag->appendXML($e);
281 $insertion_point->appendChild($frag);
282 $doc->save($filepath);
284 } else {
285 print "Couldn't find the main section";
286 return;
290 function glosses() {
291 return $this->glosses;
297 * This class represents a language pair, with the left dictionary,
298 * right dictionary and bilingual dictionary (bidix).
300 class Pair {
302 public $name = '';
303 public $tags;
304 public $show;
305 public $left;
306 public $right;
307 public $bidix;
308 public $templates_left;
309 public $templates_right;
310 public $parent;
311 public $wd;
312 public $cachedir;
313 public $templatedir;
315 public $glosses_left;
316 public $glosses_right;
317 public $display_left;
318 public $display_right;
320 function Pair($_wd, $_name, $_parent) {
321 $this->name = $_name;
322 $this->parent = $_parent;
323 $this->wd = $_wd;
326 function populate() {
328 $this->templatedir = $this->wd . '/templates/';
330 $dicts = $this->parent->getElementsByTagName('dictionary');
332 // <dictionary n="Spanish" side="left" format="dix" file="apertium-es-gl.es.dix" />
334 $cachedir = $this->cachedir;
336 foreach($dicts as $dict) {
337 $current = $dict->getAttribute('n');
338 $side = $dict->getAttribute('side');
339 $filename = $dict->getAttribute('file');
341 if($side == 'l') {
342 $doc = new DOMDocument;
343 $doc->load($cachedir . $filename);
344 $this->left = new Dictionary($current, $filename, $doc, $this->tags, $this->glosses_left);
345 $this->left->set_display_mode($this->display_left);
346 } else if($side == 'bilingual') {
347 $doc = new DOMDocument;
348 $doc->load($cachedir . $filename);
349 $this->bidix = new Dictionary($current, $filename, $doc, $this->tags, "");
350 } else if($side == 'r') {
351 $doc = new DOMDocument;
352 $doc->load($cachedir . $filename);
353 $this->right = new Dictionary($current, $filename, $doc, $this->tags, $this->glosses_right);
354 $this->right->set_display_mode($this->display_right);
359 function template_dir() {
360 return $this->templatedir;
363 function tags() {
364 return $this->tags;
367 function shows($_tag) {
368 return $this->show[$_tag];
371 function add_tag($_tag, $_list) {
372 $this->tags[$_tag] = $_tag;
373 $this->show[$_tag] = $_list;
376 function add_template($_template, $_tag, $_side) {
377 if($_side == "l") {
378 $this->templates_left[$_tag] = $_template;
379 } else if($_side == "r") {
380 $this->templates_right[$_tag] = $_template;
382 return FALSE;
385 function add_gloss($_paradigm, $_gloss, $_side) {
386 if($_side == "l") {
387 $this->glosses_left[$_paradigm] = $_gloss;
388 } else if($_side == "r") {
389 $this->glosses_right[$_paradigm] = $_gloss;
391 return FALSE;
394 function set_display_mode($_tag, $_mode, $_side) {
395 if($_side == "l") {
396 $this->display_left[$_tag] = $_mode;
397 } else if($_side == "r") {
398 $this->display_right[$_tag] = $_mode;
400 return FALSE;
403 function template($_tag, $_side) {
404 if($_side == "l") {
405 return $this->templates_left[$_tag];
406 } else if($_side == "r") {
407 return $this->templates_right[$_tag];
409 return FALSE;
412 function templates($_side) {
413 if($_side == "l") {
414 return $this->templates_left;
415 } else if($_side == "r") {
416 return $this->templates_right;
418 return FALSE;
421 function dictionary($_side) {
422 if($_side == 'left') {
423 return $this->left;
424 } else if($_side == 'bilingual') {
425 return $this->bidix;
426 } else if($_side == 'right') {
427 return $this->right;
431 function commit($e_left, $e_bidix, $e_right) {
432 $this->left->commit($this->cachedir, $e_left);
433 $this->right->commit($this->cachedir, $e_right);
434 $this->bidix->commit($this->cachedir, $e_bidix);