2 * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
19 #include <lttoolbox/FSTProcessor.H>
20 #include <lttoolbox/Compression.H>
21 #include <lttoolbox/MyStdio.H>
27 FSTProcessor::FSTProcessor()
29 // escaped_chars chars
30 escaped_chars.insert(static_cast<unsigned short>('['));
31 escaped_chars.insert(static_cast<unsigned short>(']'));
32 escaped_chars.insert(static_cast<unsigned short>('^'));
33 escaped_chars.insert(static_cast<unsigned short>('$'));
34 escaped_chars.insert(static_cast<unsigned short>('/'));
35 escaped_chars.insert(static_cast<unsigned short>('\\'));
36 escaped_chars.insert(static_cast<unsigned short>('@'));
37 escaped_chars.insert(static_cast<unsigned short>('<'));
38 escaped_chars.insert(static_cast<unsigned short>('>'));
41 FSTProcessor::~FSTProcessor()
46 FSTProcessor::streamError()
48 cerr << "Error: Malformed input stream." << endl;
53 FSTProcessor::readEscaped(FILE *input)
60 unsigned short val = static_cast<unsigned short>(fgetc_unlocked(input));
62 if(feof(input) || escaped_chars.find(val) == escaped_chars.end())
71 FSTProcessor::readFullBlock(FILE *input, char const delim1, char const delim2)
77 while(!feof(input) && c != delim2)
79 c = static_cast<char>(fgetc_unlocked(input));
87 result += static_cast<char>(readEscaped(input));
100 FSTProcessor::readAnalysis(FILE *input)
102 if(!input_buffer.isEmpty())
104 return input_buffer.next();
107 unsigned short val = static_cast<unsigned short>(fgetc_unlocked(input));
113 if(escaped_chars.find(val) != escaped_chars.end())
118 val = static_cast<unsigned short>(alphabet(readFullBlock(input, '<', '>')));
119 input_buffer.add(val);
123 blankqueue.push(readFullBlock(input, '[', ']'));
124 input_buffer.add(' ');
125 return static_cast<unsigned short>(' ');
128 val = static_cast<unsigned short>(fgetc_unlocked(input));
129 if(escaped_chars.find(val) == escaped_chars.end())
133 input_buffer.add(val);
141 input_buffer.add(val);
146 FSTProcessor::readPostgeneration(FILE *input)
148 if(!input_buffer.isEmpty())
150 return input_buffer.next();
153 unsigned short val = static_cast<unsigned short>(fgetc_unlocked(input));
162 val = static_cast<unsigned short>(alphabet(readFullBlock(input, '<', '>')));
163 input_buffer.add(val);
167 blankqueue.push(readFullBlock(input, '[', ']'));
168 input_buffer.add(' ');
169 return static_cast<unsigned short>(' ');
172 val = static_cast<unsigned short>(fgetc_unlocked(input));
173 if(escaped_chars.find(val) == escaped_chars.end())
177 input_buffer.add(val);
181 input_buffer.add(val);
190 FSTProcessor::skipUntil(FILE *input, FILE *output, int const character)
194 int val = fgetc_unlocked(input);
202 val = fgetc_unlocked(input);
207 fputc_unlocked('\\', output);
208 fputc_unlocked(val, output);
210 else if(val == character)
216 fputc_unlocked(val, output);
222 FSTProcessor::readGeneration(FILE *input, FILE *output)
224 int val = fgetc_unlocked(input);
235 val = fgetc_unlocked(input);
243 fputc_unlocked(val,output);
244 val = fgetc_unlocked(input);
249 fputc_unlocked(val,output);
250 skipUntil(input,output, '^');
251 val = fgetc_unlocked(input);
259 fputc_unlocked(val, output);
260 skipUntil(input, output, '^');
261 val = fgetc_unlocked(input);
272 val = fgetc_unlocked(input);
273 return static_cast<unsigned short>(val);
278 return static_cast<unsigned short>('$');
284 while((val = fgetc_unlocked(input)) != '>')
294 return alphabet(cad);
298 fputs_unlocked(readFullBlock(input, '[', ']').c_str(), output);
299 return readGeneration(input, output);
303 return static_cast<unsigned short>(val);
310 FSTProcessor::flushBlanks(FILE *output)
312 for(unsigned int i = blankqueue.size(); i > 0; i--)
314 fputs_unlocked(blankqueue.front().c_str(), output);
320 FSTProcessor::calcInitial()
322 for(map<string, TransExe, Ltstr>::iterator it = transducers.begin(),
323 limit = transducers.end();
326 root.addTransition(0, 0, it->second.getInitial());
329 initial_state.init(&root);
333 FSTProcessor::endsWith(string const &a, string const &b)
335 if(a.size() < b.size())
341 return a.substr(a.size()-b.size()) == b;
346 FSTProcessor::classifyFinals()
348 for(map<string, TransExe, Ltstr>::iterator it = transducers.begin(),
349 limit = transducers.end();
352 if(endsWith(it->first, "@inconditional"))
354 inconditional.insert(it->second.getFinals().begin(),
355 it->second.getFinals().end());
357 else if(endsWith(it->first, "@standard"))
359 standard.insert(it->second.getFinals().begin(),
360 it->second.getFinals().end());
362 else if(endsWith(it->first, "@postblank"))
364 postblank.insert(it->second.getFinals().begin(),
365 it->second.getFinals().end());
369 cerr << "Error: Unsupported transducer type for '";
370 cerr << it->first << "'." << endl;
377 FSTProcessor::writeEscaped(string const &str, FILE *output)
379 for(unsigned int i = 0, limit = str.size(); i < limit; i++)
381 if(escaped_chars.find(str[i]) != escaped_chars.end())
383 fputc_unlocked('\\', output);
385 fputc_unlocked(str[i], output);
391 FSTProcessor::printWord(string const &sf, string const &lf, FILE *output)
393 fputc_unlocked('^', output);
394 writeEscaped(sf, output);
395 fwrite_unlocked(lf.c_str(), lf.size(), sizeof(char), output);
396 fputc_unlocked('$', output);
400 FSTProcessor::printUnknownWord(string const &sf, FILE *output)
402 fputc_unlocked('^', output);
403 writeEscaped(sf, output);
404 fputc_unlocked('/', output);
405 fputc_unlocked('*', output);
406 writeEscaped(sf, output);
407 fputc_unlocked('$', output);
411 FSTProcessor::lastBlank(string const &str)
413 for(int i = static_cast<int>(str.size())-1; i >= 0; i--)
415 if(alphabetic_chars.find(str[i]) == alphabetic_chars.end())
417 return static_cast<unsigned int>(i);
425 FSTProcessor::printSpace(unsigned short const val, FILE *output)
427 if(blankqueue.size() > 0)
433 fputc_unlocked(val, output);
438 FSTProcessor::isEscaped(unsigned short const c) const
440 return escaped_chars.find(c) != escaped_chars.end();
444 FSTProcessor::isAlphabetic(unsigned short const c) const
446 return alphabetic_chars.find(c) != alphabetic_chars.end();
450 FSTProcessor::load(FILE *input)
452 int len = Compression::multibyte_read(input);
456 alphabetic_chars.insert(static_cast<unsigned short>(fgetc_unlocked(input)));
460 alphabet.read(input);
462 len = Compression::multibyte_read(input);
466 int len2 = Compression::multibyte_read(input);
470 name += static_cast<char>(fgetc_unlocked(input));
473 transducers[name].read(input, alphabet.size());
480 FSTProcessor::initAnalysis()
484 all_finals = standard;
485 all_finals.insert(inconditional.begin(), inconditional.end());
486 all_finals.insert(postblank.begin(), postblank.end());
490 FSTProcessor::initGeneration()
493 for(map<string, TransExe, Ltstr>::iterator it = transducers.begin(),
494 limit = transducers.end();
497 all_finals.insert(it->second.getFinals().begin(),
498 it->second.getFinals().end());
503 FSTProcessor::initPostgeneration()
509 FSTProcessor::initBiltrans()
515 FSTProcessor::analysis(FILE *input, FILE *output)
517 bool last_incond = false;
518 bool last_postblank = false;
519 State current_state = initial_state;
524 while(unsigned short val = readAnalysis(input))
526 // test for final states
527 if(current_state.isFinal(all_finals))
529 if(current_state.isFinal(inconditional))
531 bool firstupper = isupper(sf[0]);
532 bool uppercase = firstupper && isupper(sf[sf.size()-1]);
534 lf = current_state.filterFinals(all_finals, alphabet,
536 uppercase, firstupper);
538 last = input_buffer.getPos();
540 else if(current_state.isFinal(postblank))
542 bool firstupper = isupper(sf[0]);
543 bool uppercase = firstupper && isupper(sf[sf.size()-1]);
545 lf = current_state.filterFinals(all_finals, alphabet,
547 uppercase, firstupper);
548 last_postblank = true;
549 last = input_buffer.getPos();
551 else if(!isAlphabetic(val))
553 bool firstupper = isupper(sf[0]);
554 bool uppercase = firstupper && isupper(sf[sf.size()-1]);
556 lf = current_state.filterFinals(all_finals, alphabet,
558 uppercase, firstupper);
559 last_postblank = false;
561 last = input_buffer.getPos();
564 else if(sf == "" && isspace(val))
568 last_postblank = false;
570 last = input_buffer.getPos();
575 current_state.step(val);
579 current_state.step(val, tolower(val));
582 if(current_state.size() != 0)
584 alphabet.getSymbol(sf, val);
588 if(!isAlphabetic(val) && sf == "")
592 printSpace(val, output);
598 fputc_unlocked('\\', output);
600 fputc_unlocked(val, output);
605 printWord(sf.substr(0, sf.size()-input_buffer.diffPrevPos(last)),
607 input_buffer.setPos(last);
608 input_buffer.back(1);
610 else if(last_postblank)
612 printWord(sf.substr(0, sf.size()-input_buffer.diffPrevPos(last)),
614 fputc_unlocked(' ', output);
615 input_buffer.setPos(last);
616 input_buffer.back(1);
618 else if(isAlphabetic(val) &&
619 ((sf.size()-input_buffer.diffPrevPos(last)) > lastBlank(sf) ||
624 alphabet.getSymbol(sf, val);
626 while((val = readAnalysis(input)) && isAlphabetic(val));
628 unsigned int limit = sf.find(' ');
629 unsigned int size = sf.size();
630 limit = (limit == static_cast<unsigned
631 int>(string::npos)?size:limit);
632 input_buffer.back(1+(size-limit));
633 printUnknownWord(sf.substr(0, limit), output);
637 unsigned int limit = sf.find(' ');
638 unsigned int size = sf.size();
639 limit = (limit == static_cast<unsigned
640 int>(string::npos)?size:limit);
641 input_buffer.back(1+(size-limit));
642 printUnknownWord(sf.substr(0, limit), output);
646 printWord(sf.substr(0, sf.size()-input_buffer.diffPrevPos(last)),
648 input_buffer.setPos(last);
649 input_buffer.back(1);
652 current_state = initial_state;
656 last_postblank = false;
660 // print remaining blanks
665 FSTProcessor::generation(FILE *input, FILE *output, GenerationMode mode)
667 State current_state = initial_state;
672 skipUntil(input, output, '^');
674 while((val = readGeneration(input, output)) != 0xffff)
682 writeEscaped(sf, output);
686 writeEscaped(sf.substr(1), output);
689 else if(sf[0] == '@')
693 writeEscaped(sf, output);
695 else if(mode == gm_clean)
697 writeEscaped(removeTags(sf.substr(1)), output);
699 else if(mode == gm_unknown)
701 writeEscaped(removeTags(sf), output);
704 else if(current_state.isFinal(all_finals))
706 bool uppercase = sf.size() > 1 && isupper(sf[1]);
707 bool firstupper= isupper(sf[0]);
709 fputs_unlocked(current_state.filterFinals(all_finals, alphabet,
711 uppercase, firstupper).substr(1).c_str(),
718 fputc_unlocked('#', output);
719 writeEscaped(sf, output);
721 else if(mode == gm_clean)
723 writeEscaped(removeTags(sf), output);
725 else if(mode == gm_unknown)
727 fputc_unlocked('#', output);
728 writeEscaped(removeTags(sf), output);
732 current_state = initial_state;
735 else if(isspace(val) && sf.size() == 0)
739 else if(sf.size() > 0 && sf[0] == '*')
741 alphabet.getSymbol(sf, val);
745 alphabet.getSymbol(sf,val);
746 if(current_state.size() > 0)
748 if(val < 256 && isupper(val))
750 current_state.step(val, tolower(val));
754 current_state.step(val);
762 FSTProcessor::postgeneration(FILE *input, FILE *output)
764 bool skip_mode = true;
765 State current_state = initial_state;
770 while(unsigned short val = readPostgeneration(input))
781 printSpace(val, output);
787 fputc_unlocked('\\', output);
789 fputc_unlocked(val, output);
794 // test for final states
795 if(current_state.isFinal(all_finals))
797 bool firstupper = isupper(sf[1]);
798 bool uppercase = sf.size() > 1 && firstupper && isupper(sf[2]);
799 lf = current_state.filterFinals(all_finals, alphabet,
801 uppercase, firstupper, 0);
803 // case of the beggining of the next word
806 for(unsigned int i = sf.size()-1; i >= 0; i--)
814 mybuf = sf[i] + mybuf;
820 bool myfirstupper = isupper(mybuf[0]);
821 bool myuppercase = mybuf.size() > 1 && isupper(mybuf[1]);
823 for(unsigned int i = lf.size()-1; i >= 0; i--)
827 if(myfirstupper && i != lf.size()-1)
829 lf[i+1] = toupper(lf[i+1]);
833 lf[i+1] = tolower(lf[i+1]);
841 lf[i] = toupper(lf[i]);
845 lf[i] = tolower(lf[i]);
851 last = input_buffer.getPos();
856 current_state.step(val);
860 current_state.step(val, tolower(val));
863 if(current_state.size() != 0)
865 alphabet.getSymbol(sf, val);
871 unsigned int mark = sf.size();
872 for(unsigned int i = 1, limit = sf.size(); i < limit; i++)
880 fputs_unlocked(sf.substr(1, mark-1).c_str(), output);
881 if(mark == sf.size())
883 input_buffer.back(1);
887 input_buffer.back(sf.size()-mark);
892 fputs_unlocked(lf.substr(1,lf.size()-3).c_str(), output);
893 input_buffer.setPos(last);
894 input_buffer.back(2);
895 val = lf[lf.size()-2];
898 printSpace(val, output);
904 fputc_unlocked('\\', output);
906 fputc_unlocked(val, output);
910 current_state = initial_state;
918 // print remaining blanks
923 FSTProcessor::biltrans(string const &input_word, bool with_delim)
925 State current_state = initial_state;
927 unsigned int start_point = 1;
928 unsigned int end_point = input_word.size()-2;
931 if(with_delim == false)
934 end_point = input_word.size()-1;
937 if(input_word[start_point] == '*')
942 bool firstupper = isupper(input_word[start_point]);
943 bool uppercase = firstupper && isupper(input_word[start_point+1]);
945 for(unsigned int i = start_point; i <= end_point; i++)
950 if(input_word[i] == '\\')
953 val = static_cast<unsigned short>((unsigned char) input_word[i]);
955 else if(input_word[i] == '<')
958 for(unsigned int j = i + 1; j <= end_point; j++)
960 symbol += input_word[j];
961 if(input_word[j] == '>')
967 val = alphabet(symbol);
971 val = static_cast<unsigned short>((unsigned char) input_word[i]);
973 if(current_state.size() != 0)
975 if(val < 256 && isupper(val))
977 current_state.step(val, tolower(val));
981 current_state.step(val);
984 if(current_state.isFinal(all_finals))
986 result = current_state.filterFinals(all_finals, alphabet,
988 uppercase, firstupper, 0);
995 result = result.substr(1);
999 if(current_state.size() == 0)
1003 queue.append(symbol);
1007 // word is not present
1010 result = "^@" + input_word.substr(1);
1014 result = "@" + input_word;
1021 // attach unmatched queue automatically
1025 string result_with_queue = "";
1026 bool multiple_translation = false;
1027 for(unsigned int i = 0, limit = result.size(); i != limit; i++)
1032 result_with_queue += '\\';
1037 result_with_queue.append(queue);
1038 multiple_translation = true;
1044 result_with_queue += result[i];
1046 result_with_queue.append(queue);
1050 result_with_queue += '$';
1051 // if(multiple_translation)
1053 // result_with_queue[0] = '@';
1054 // result = "^" + result_with_queue;
1059 return result_with_queue;
1062 // if(multiple_translation)
1064 // return "@"+result_with_queue;
1068 return result_with_queue;
1082 FSTProcessor::biltransWithQueue(string const &input_word, bool with_delim)
1084 State current_state = initial_state;
1086 unsigned int start_point = 1;
1087 unsigned int end_point = input_word.size()-2;
1090 if(with_delim == false)
1093 end_point = input_word.size()-1;
1096 if(input_word[start_point] == '*')
1098 return pair<string, int>(input_word, 0);
1101 bool firstupper = isupper(input_word[start_point]);
1102 bool uppercase = firstupper && isupper(input_word[start_point+1]);
1104 for(unsigned int i = start_point; i <= end_point; i++)
1109 if(input_word[i] == '\\')
1112 val = static_cast<unsigned short>((unsigned char) input_word[i]);
1114 else if(input_word[i] == '<')
1117 for(unsigned int j = i + 1; j <= end_point; j++)
1119 symbol += input_word[j];
1120 if(input_word[j] == '>')
1126 val = alphabet(symbol);
1130 val = static_cast<unsigned short>((unsigned char) input_word[i]);
1132 if(current_state.size() != 0)
1134 if(val < 256 && isupper(val))
1136 current_state.step(val, tolower(val));
1140 current_state.step(val);
1143 if(current_state.isFinal(all_finals))
1145 result = current_state.filterFinals(all_finals, alphabet,
1147 uppercase, firstupper, 0);
1154 result = result.substr(1);
1158 if(current_state.size() == 0)
1162 queue.append(symbol);
1166 // word is not present
1169 result = "^@" + input_word.substr(1);
1173 result = "@" + input_word;
1175 return pair<string, int>(result, 0);
1180 // attach unmatched queue automatically
1184 string result_with_queue = "";
1185 bool multiple_translation = false;
1186 for(unsigned int i = 0, limit = result.size(); i != limit; i++)
1191 result_with_queue += '\\';
1196 result_with_queue.append(queue);
1197 multiple_translation = true;
1203 result_with_queue += result[i];
1205 result_with_queue.append(queue);
1209 result_with_queue += '$';
1211 return pair<string, int>(result_with_queue, queue.size());
1219 return pair<string, int>(result, 0);
1224 FSTProcessor::biltransWithoutQueue(string const &input_word, bool with_delim)
1226 State current_state = initial_state;
1228 unsigned int start_point = 1;
1229 unsigned int end_point = input_word.size()-2;
1231 if(with_delim == false)
1234 end_point = input_word.size()-1;
1237 if(input_word[start_point] == '*')
1242 bool firstupper = isupper(input_word[start_point]);
1243 bool uppercase = firstupper && isupper(input_word[start_point+1]);
1245 for(unsigned int i = start_point; i <= end_point; i++)
1250 if(input_word[i] == '\\')
1253 val = static_cast<unsigned short>((unsigned char) input_word[i]);
1255 else if(input_word[i] == '<')
1258 for(unsigned int j = i + 1; j <= end_point; j++)
1260 symbol += input_word[j];
1261 if(input_word[j] == '>')
1267 val = alphabet(symbol);
1271 val = static_cast<unsigned short>((unsigned char) input_word[i]);
1273 if(current_state.size() != 0)
1275 if(val < 256 && isupper(val))
1277 current_state.step(val, tolower(val));
1281 current_state.step(val);
1284 if(current_state.isFinal(all_finals))
1286 result = current_state.filterFinals(all_finals, alphabet,
1288 uppercase, firstupper, 0);
1295 result = result.substr(1);
1299 if(current_state.size() == 0)
1303 // word is not present
1306 result = "^@" + input_word.substr(1);
1310 result = "@" + input_word;
1326 FSTProcessor::valid() const
1328 return !initial_state.isFinal(all_finals);
1332 FSTProcessor::readSAO(FILE *input)
1334 if(!input_buffer.isEmpty())
1336 return input_buffer.next();
1339 unsigned short val = static_cast<unsigned short>(fgetc_unlocked(input));
1345 if(escaped_chars.find(val) != escaped_chars.end())
1349 string str = readFullBlock(input, '<', '>');
1350 if(str.substr(0, 9) == "<![CDATA[")
1352 while(str.substr(str.size()-3) != "]]>")
1354 str.append(readFullBlock(input, '<', '>').substr(1));
1356 blankqueue.push(str);
1357 input_buffer.add(' ');
1358 return static_cast<unsigned short>(' ');
1365 else if (val == '\\') {
1366 val = static_cast<unsigned short>(fgetc_unlocked(input));
1369 input_buffer.add(val);
1381 input_buffer.add(val);
1386 FSTProcessor::printSAOWord(string const &lf, FILE *output)
1388 for(unsigned int i = 1, limit = lf.size(); i != limit; i++)
1394 fputc_unlocked(lf[i], output);
1399 FSTProcessor::SAO(FILE *input, FILE *output)
1401 bool last_incond = false;
1402 bool last_postblank = false;
1403 State current_state = initial_state;
1408 escaped_chars.clear();
1409 escaped_chars.insert(static_cast<unsigned short>('\\'));
1410 escaped_chars.insert(static_cast<unsigned short>('<'));
1411 escaped_chars.insert(static_cast<unsigned short>('>'));
1413 while(unsigned short val = readSAO(input))
1415 // test for final states
1416 if(current_state.isFinal(all_finals))
1418 if(current_state.isFinal(inconditional))
1420 bool firstupper = isupper(sf[0]);
1421 bool uppercase = firstupper && isupper(sf[sf.size()-1]);
1423 lf = current_state.filterFinalsSAO(all_finals, alphabet,
1425 uppercase, firstupper);
1427 last = input_buffer.getPos();
1429 else if(current_state.isFinal(postblank))
1431 bool firstupper = isupper(sf[0]);
1432 bool uppercase = firstupper && isupper(sf[sf.size()-1]);
1434 lf = current_state.filterFinalsSAO(all_finals, alphabet,
1436 uppercase, firstupper);
1437 last_postblank = true;
1438 last = input_buffer.getPos();
1440 else if(!isAlphabetic(val))
1442 bool firstupper = isupper(sf[0]);
1443 bool uppercase = firstupper && isupper(sf[sf.size()-1]);
1445 lf = current_state.filterFinalsSAO(all_finals, alphabet,
1447 uppercase, firstupper);
1448 last_postblank = false;
1449 last_incond = false;
1450 last = input_buffer.getPos();
1453 else if(sf == "" && isspace(val)) // "hay incidencias" problem
1457 last_postblank = false;
1458 last_incond = false;
1459 last = input_buffer.getPos();
1464 current_state.step(val);
1468 current_state.step(val, tolower(val));
1471 if(current_state.size() != 0)
1473 alphabet.getSymbol(sf, val);
1477 if(!isAlphabetic(val) && sf == "")
1481 printSpace(val, output);
1487 fputc_unlocked('\\', output);
1489 fputc_unlocked(val, output);
1492 else if(last_incond)
1494 printSAOWord(lf, output);
1495 input_buffer.setPos(last);
1496 input_buffer.back(1);
1498 else if(last_postblank)
1500 printSAOWord(lf, output);
1501 fputc_unlocked(' ', output);
1502 input_buffer.setPos(last);
1503 input_buffer.back(1);
1505 else if(isAlphabetic(val) &&
1506 ((sf.size()-input_buffer.diffPrevPos(last)) > lastBlank(sf) ||
1511 alphabet.getSymbol(sf, val);
1513 while((val = readSAO(input)) && isAlphabetic(val));
1515 unsigned int limit = sf.find(' ');
1516 unsigned int size = sf.size();
1517 limit = (limit == static_cast<unsigned
1518 int>(string::npos)?size:limit);
1519 input_buffer.back(1+(size-limit));
1520 fwrite_unlocked("<d>", 3, sizeof(char), output);
1521 fwrite_unlocked(sf.c_str(), limit, sizeof(char), output);
1522 fwrite_unlocked("</d>", 4, sizeof(char), output);
1527 unsigned int limit = sf.find(' ');
1528 unsigned int size = sf.size();
1529 limit = (limit == static_cast<unsigned
1530 int>(string::npos)?size:limit);
1531 input_buffer.back(1+(size-limit));
1532 fwrite_unlocked("<d>", 3, sizeof(char), output);
1533 fwrite_unlocked(sf.c_str(), limit, sizeof(char), output);
1534 fwrite_unlocked("</d>", 4, sizeof(char), output);
1538 printSAOWord(lf, output);
1539 input_buffer.setPos(last);
1540 input_buffer.back(1);
1543 current_state = initial_state;
1546 last_incond = false;
1547 last_postblank = false;
1551 // print remaining blanks
1552 flushBlanks(output);
1556 FSTProcessor::removeTags(string const &str)
1558 for(unsigned int i = 0; i < str.size(); i++)
1560 if(str[i] == '<' && i >=1 && str[i-1] != '\\')
1562 return str.substr(0, i);