Moving more modules
[apertium.git] / trunk / apertium-tools / apertium-utils / morph-indux / trie.cpp
blob2e0d6391316ce10da6640836dcc9c0e81b1a8d26
1 /*
2 * Copyright (C) 2007 Francis Tyers
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
17 * 02111-1307, USA.
20 #include <cwctype>
22 #include "trie.h"
25 * This class is an implementation of a reverse Trie, the
26 * trie for 'function', 'fractal', 'fraction' and 'full' looks
27 * like the following:
29 * root
30 * / \
31 * N L
32 * / / \
33 * O L A
34 * / / \
35 * I U T
36 * | | \
37 * T F C
38 * | \
39 * C A
40 * / \ \
41 * N A R
42 * / \ \
43 * U R F
44 * / \
45 * F F
48 using namespace std;
50 unsigned int lemma = 0;
51 bool debug;
53 TrieNode::TrieNode()
55 // wcout << L"TrieNode()" << endl;
58 TrieNode::~TrieNode()
60 // wcout << L"~TrieNode()" << endl;
63 void
64 TrieNode::add(wstring s)
66 if(!s.empty()) {
67 wchar_t c = s.at(s.length() - 1); // this is stupid but i
68 wstring x = L""; // can't think of a
69 x += c; // better way to do it
70 value = x;
72 if(debug) {
73 wcout << L"TrieNode::add( " << s << L" ) [" << x << L"]" << endl;
76 if(children.count(x) == 0) {
77 if(debug) {
78 wcout << L"* Creating new TrieNode for `" << x << L"'" << endl;
80 children[x] = TrieNode();
83 wstring n = s.substr(0, s.length() - 1);
84 if(debug) {
85 wcout << L" ? Adding: " << n << endl;
87 children[x].add(n);
89 } else {
90 if(debug) {
91 wcout << L"Returning from TrieNode::add()" << endl;
93 return;
98 * returns true if the string was found, false if it wasn't
101 bool
102 TrieNode::find(wstring s)
104 if(s.empty()) {
105 if(debug) {
106 wcout << L"Found." << endl;
108 return true;
109 } else {
110 wchar_t c = s.at(0);
111 wstring x = L""; x += c;
113 if(debug) {
114 wcout << L"TrieNode::find( " << s << L" ) [" << x << L" : " << s.length() << L"]" << endl;
116 if(children.count(x) == 0) {
117 if(debug) {
118 wcout << L"Not found." << endl;
120 } else {
121 children[x].find(s.substr(1));
124 return false;
128 * Return a list of candidate lemmata from a suffix
129 * The 'candidates' argument is ammended and returned.
131 * sfx = suffix
134 vector<wstring>
135 TrieNode::candidates(wstring sfx, vector<wstring> & candidates)
137 if(sfx.empty()) { // reached the end of the suffix.
139 if(debug) {
140 wcout << L"Found." << endl;
143 vector<wstring>::iterator eliter = candidates.begin();
144 eliter = candidates.insert(eliter, L"");
146 if(debug) {
147 wcout << L"TrieNode::candidates() [children: " << children.size() << L"]" << endl;
150 print2(candidates); // call this something less stupid
152 if(debug) {
153 wcout << endl;
156 // we got to the end of the suffix so now we need to count up the
157 // candidates. we also need to reverse them to retrieve the original
158 // stem.
159 for(eliter = candidates.begin(); eliter != candidates.end(); eliter++) {
160 wstring reversed = (*eliter);
161 unsigned int count = 0;
162 // reverse string.
163 for(unsigned int i = (*eliter).length(); i != 0; i--) {
164 reversed[count++] = (*eliter).at(i-1);
167 (*eliter) = reversed;
169 if(debug) {
170 wcout << L" @ " << reversed << endl;
174 return candidates;
176 } else { // we haven't reached the end of the suffix
178 wchar_t c = sfx.at(0);
179 wstring x = L""; x += c;
181 if(debug) {
182 wcout << L"TrieNode::find( " << sfx << L" ) [" << x << L" : " << sfx.length() << L"]" << endl;
185 if(children.count(x) == 0) {
186 // if there are no children to this character then the suffix has
187 // no candidate stems -- and we should return what we currently have
188 if(debug) {
189 wcout << L"Not found." << endl;
191 return candidates;
192 } else {
193 // take the next character in the suffix and recurse down
194 // removing the current character from the suffix.
195 return children[x].candidates(sfx.substr(1), candidates);
200 /* THE MANGLING HAPPENS HERE */
202 void
203 TrieNode::print2(vector<wstring> & candidates)
205 map<wstring,TrieNode>::iterator iter;
206 vector<wstring>::iterator eliter = candidates.end();
208 if(children.empty()) {
209 if(debug) {
210 wcout << L" + candidates.size [e] = " << candidates.size() << endl;
212 lemma++;
214 } else if(children.size() > 1) {
216 if(debug) {
217 wcout << L" + candidates.size [1] = " << candidates.size() << L" : " << children.size() << L" : " << candidates.at(lemma) << endl;
220 for(unsigned int i = 1; i < children.size(); i++) {
221 eliter = candidates.insert(eliter, candidates.at(lemma));
223 } else {
224 wcout << L"(" << children.size() << L") We didn't do shit. Children wasn't empty and size wasn't > 1" << endl;
227 for(iter = children.begin(); iter != children.end(); iter++) {
228 if(debug) {
229 wcout << L" + candidates.size [f] = " << candidates.size() << L" - " << candidates[lemma] << L" += " << (*iter).first << endl;
231 candidates[lemma] += (*iter).first;
232 if(debug) {
233 wcout << L"[" << (*iter).first << L"] (" << children.size() << L") " << endl;
235 children[(*iter).first].print2(candidates);
239 Trie::Trie(bool _verbose)
241 root = TrieNode();
242 debug = _verbose;
245 Trie::Trie()
247 root = TrieNode();
248 debug = false;
251 Trie::~Trie()
253 // wcout << L"~Trie()" << endl;
256 void
257 Trie::add(wstring s)
259 if(debug) {
260 wcout << L"adding: " << s << L" / " << endl;
263 if(s.length() > 0) {
264 root.add(s);
268 bool
269 Trie::find(wstring s)
271 if(debug) {
272 wcout << L"Trie::find( " << s << L" ) " << endl;
274 return root.find(s);
278 * Return a vector of candidate lemmas for a particular suffix
280 vector<wstring>
281 Trie::candidates(wstring sfx)
283 vector<wstring> candidates;
284 wstring reversed = sfx;
285 unsigned int count = 0;
287 // make sure this is clear.
288 candidates.clear();
289 lemma = 0;
291 // we have to reverse the string because the Trie
292 // is in reverse order -- no standard library
293 // function for reversing strings iirc.
294 for(unsigned int i = sfx.length(); i != 0; i--) {
295 reversed[count++] = sfx.at(i-1);
298 if(debug) {
299 wcout << L"sfx: " << sfx << "; rev: " << reversed << endl;
302 return root.candidates(reversed, candidates);
305 void
306 Trie::print(void)