2 * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
19 #include <apertium-multiple-translations/TransferMult.H>
20 #include <apertium/TRXReader.H>
21 #include <lttoolbox/Compression.H>
22 #include <lttoolbox/XMLParseUtil.H>
31 TransferMult::copy(TransferMult const &o)
36 TransferMult::destroy()
45 TransferMult::TransferMult()
53 TransferMult::~TransferMult()
58 TransferMult::TransferMult(TransferMult const &o)
64 TransferMult::operator =(TransferMult const &o)
75 TransferMult::tolower(string const &str) const
78 for(unsigned int i = 0, limit = str.size(); i != limit; i++)
80 result[i] = ::tolower(result[i]);
88 TransferMult::readData(FILE *in)
91 any_char = alphabet(TRXReader::ANY_CHAR);
92 any_tag = alphabet(TRXReader::ANY_TAG);
100 for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
102 int key = Compression::multibyte_read(in);
103 finals[key] = Compression::multibyte_read(in);
106 me = new MatchExe(t, finals);
111 attr_items["lem"] = "(([^<]|\"\\<\")+)";
112 attr_items["lemq"] = "(#[ _][^<]+)";
113 attr_items["lemh"] = "(([^<#]|\"\\<\"|\"\\#\")+)";
114 attr_items["whole"] = ".+";
115 attr_items["tags"] = "((<[^>]+>)+)";
117 for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
119 int size_k = Compression::multibyte_read(in);
120 char cad_k[size_k+1];
121 fread(cad_k, sizeof(char), size_k, in);
124 int size_v = Compression::multibyte_read(in);
125 char cad_v[size_v+1];
126 fread(cad_v, sizeof(char), size_v, in);
129 attr_items[cad_k] = cad_v;
133 for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
135 int size_k = Compression::multibyte_read(in);
136 char cad_k[size_k+1];
137 fread(cad_k, sizeof(char), size_k, in);
140 int size_v = Compression::multibyte_read(in);
141 char cad_v[size_v+1];
142 fread(cad_v, sizeof(char), size_v, in);
145 variables[cad_k] = cad_v;
149 for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
151 int size_k = Compression::multibyte_read(in);
152 char cad_k[size_k+1];
153 fread(cad_k, sizeof(char), size_k, in);
156 macros[cad_k] = Compression::multibyte_read(in);
161 for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
163 int size_k = Compression::multibyte_read(in);
164 char cad_k[size_k+1];
165 fread(cad_k, sizeof(char), size_k, in);
167 for(int j = 0, limit2 = Compression::multibyte_read(in); j != limit2; j++)
169 int size_v = Compression::multibyte_read(in);
170 char cad_v[size_v+1];
171 fread(cad_v, sizeof(char), size_v, in);
173 lists[cad_k].insert(cad_v);
174 listslow[cad_k].insert(tolower(cad_v));
180 TransferMult::readBil(string const &fstfile)
182 FILE *in = fopen(fstfile.c_str(), "r");
185 cerr << "Error: Could not open file '" << fstfile << "'." << endl;
194 TransferMult::read(string const &datafile, string const &fstfile)
197 FILE *in = fopen(datafile.c_str(), "r");
200 cerr << "Error: Could not open file '" << datafile << "'." << endl;
210 TransferMult::readToken(FILE *in)
212 if(!input_buffer.isEmpty())
214 return input_buffer.next();
220 int val = fgetc_unlocked(in);
223 return input_buffer.add(TransferToken(content, tt_eof));
228 content += char(fgetc_unlocked(in));
235 int val2 = fgetc_unlocked(in);
239 content += char(fgetc_unlocked(in));
248 content += char(val2);
254 return input_buffer.add(TransferToken(content, tt_word));
258 return input_buffer.add(TransferToken(content, tt_blank));
262 content += char(val);
268 TransferMult::transfer(FILE *in, FILE *out)
273 ms.init(me->getInitial());
283 input_buffer.setPos(last);
287 if(tmpword.size() != 0)
289 pair<string, int> tr = fstp.biltransWithQueue(*tmpword[0], false);
290 if(tr.first.size() != 0)
292 vector<string> multiword = acceptions(tr.first);
293 for(unsigned int i = 0, limit = multiword.size(); i != limit; i++)
297 fputs_unlocked("[ | ]", output);
299 fputc_unlocked('^', output);
300 fputs_unlocked(multiword[i].c_str(), output);
301 fputc_unlocked('$', output);
306 input_buffer.setPos(last);
308 last = input_buffer.getPos();
309 ms.init(me->getInitial());
311 else if(tmpblank.size() != 0)
313 fputs_unlocked(tmpblank[0]->c_str(), output);
315 last = input_buffer.getPos();
316 ms.init(me->getInitial());
320 int val = ms.classifyFinals(me->getFinals());
324 numwords = tmpword.size();
325 last = input_buffer.getPos();
328 TransferToken ¤t = readToken(in);
330 switch(current.getType())
333 applyWord(current.getContent());
334 tmpword.push_back(¤t.getContent());
339 tmpblank.push_back(¤t.getContent());
343 if(tmpword.size() != 0)
345 tmpblank.push_back(¤t.getContent());
350 fputs_unlocked(current.getContent().c_str(), output);
356 cerr << "Error: Unknown input token." << endl;
363 TransferMult::isDefaultWord(string const &str)
365 return str.find(" D<");
369 TransferMult::acceptions(string const &str)
371 vector<string> result;
374 for(unsigned int i = 0, limit = str.size(); i != limit; i++)
380 else if(str[i] == '/')
382 string new_word = str.substr(low, i-low);
384 if(result.size() > 1 && isDefaultWord(new_word))
386 result.push_back(result[0]);
387 result[0] = new_word;
391 result.push_back(new_word);
397 string otherword = str.substr(low);
398 if(result.size() > 0 && isDefaultWord(otherword))
400 result.push_back(result[0]);
401 result[0] = otherword;
405 result.push_back(otherword);
412 TransferMult::writeMultiple(list<vector<string> >::iterator itwords,
413 list<string>::iterator itblanks,
414 list<vector<string> >::const_iterator limitwords,
415 string acum , bool multiple)
417 if(itwords == limitwords)
421 fputs_unlocked("[ | ]", output);
423 fputs_unlocked(acum.c_str(), output);
427 vector<string> &refword = *itwords;
431 if(itwords == limitwords)
433 for(unsigned int i = 0, limit = refword.size(); i != limit; i++)
435 writeMultiple(itwords, itblanks, limitwords,
436 acum + "^" + refword[i] + "$", multiple || (i > 0));
441 string &refblank = *itblanks;
444 for(unsigned int i = 0, limit = refword.size(); i != limit; i++)
446 writeMultiple(itwords, itblanks, limitwords,
447 acum + "^" + refword[i] + "$" + refblank,
448 multiple || (i > 0));
455 TransferMult::applyRule()
458 list<vector<string> > words;
460 words.push_back(acceptions(fstp.biltransWithQueue(*tmpword[0], false).first));
462 for(unsigned int i = 1; i != numwords; i++)
464 blanks.push_back(*tmpblank[i-1]);
465 pair<string, int> tr = fstp.biltransWithQueue(*tmpword[i], false);
466 words.push_back(acceptions(tr.first));
469 writeMultiple(words.begin(), blanks.begin(), words.end());
471 ms.init(me->getInitial());
479 TransferMult::applyWord(string const &word_str)
482 for(unsigned int i = 0, limit = word_str.size(); i < limit; i++)
488 ms.step(::tolower(word_str[i]), any_char);
492 for(unsigned int j = i+1; j != limit; j++)
494 if(word_str[j] == '>')
496 int symbol = alphabet(word_str.substr(i, j-i+1));
499 ms.step(symbol, any_tag);
512 ms.step(::tolower(word_str[i]), any_char);