CMiniLexicon::FindMajorSignatures(): use log file routines
[linguistica.git] / Stem.cpp
blobcca37279faf74bf9eeb62d02cb18958e5a58f1dd
1 // Implementation of CStem methods
2 // Copyright © 2009 The University of Chicago
3 #include "Stem.h"
5 // See also Stem_Phonology.cpp for phonology methods,
6 // Stem_EncodingLength.cpp for description length calculations,
7 // GUIclasses.cpp for methods pertaining to GUI output,
8 // and Word.cpp for methods concerning stems-qua-words.
10 #include <Q3TextStream>
11 #include <QList>
12 #include "EarleyParser.h"
13 #include "Signature.h"
14 #include "Prefix.h"
15 #include "Suffix.h"
17 CLexicon* CStem::m_Lexicon; // assign value ! :TODO
19 // construction/destruction.
21 CStem::CStem(CMiniLexicon* mini)
22 : CLParse(mini),
23 m_WordCount(0),
24 m_BrokenForm(),
25 m_SuffixList(), // initialized below
26 m_pSuffixSignature(NULL), m_pPrefixSignature(NULL),
27 m_PrefixList(), // initialized below
28 m_Regular(0),
29 m_SimpleFlag(false),
30 m_StemType(UNKNOWN),
31 m_StemLoc(0),
32 m_Stem2Loc(0),
33 m_NumberOfStems(0),
34 m_PrefixLoc(0), m_SuffixLoc(0),
35 m_Confidence(QString()),
36 m_pStem(NULL),
37 m_strStem(),
38 m_strSuffix(), m_strPrefix(),
39 m_pSuffix(NULL), m_pPrefix(NULL),
40 m_LengthOfPointerToMe(0.0),
41 m_WordPtrList(new QList<CStem*>),
42 m_LeftNeighbors(),
43 m_RightNeighbors(),
44 // compounding.
45 m_MyEarleyParser(),
46 m_CompoundCount(0.0),
47 m_Affixness(0.0),
48 // phonology.
49 m_Phonology_Tier1(),
50 m_Phonology_Tier2(),
51 m_Phonology_Tier1_Skeleton(),
52 m_UnigramLogProb(0.0),
53 m_BigramLogProb(0.0),
54 m_BigramComplexity(0.0), // average
55 m_UnigramComplexity(0.0), // average
56 m_PhonologicalContent(0.0),
57 m_HMM_LogProbability(0.0),
58 // first Boltzmann model.
59 m_Tier2_LocalMI_Score(0.0),
60 m_LocalMI_TotalBoltzmannScore(0.0),
61 m_LocalMI_Plog(0.0),
62 // second Boltzmann model.
63 m_Tier2_DistantMI_Score(0.0),
64 m_DistantMI_TotalBoltzmannScore(0.0),
65 m_DistantMI_Plog(0.0),
66 // tier-1 phonology info for graphical display.
67 m_phonologies(), m_unigrams(), m_mis(),
68 m_countofunigrams(0),
69 m_countofmis(0),
70 m_maxpositive(0.0),
71 m_maxnegative(0.0),
72 m_donephonology(false)
74 m_SuffixList.Alphabetize();
75 m_PrefixList.Alphabetize();
78 CStem::CStem(const CStringSurrogate& stem, CMiniLexicon* mini)
79 : CLParse(stem, mini),
80 m_WordCount (0),
81 m_BrokenForm (),
82 m_SuffixList (), // initialized below
83 m_pSuffixSignature (NULL),
84 m_pPrefixSignature (NULL),
85 m_PrefixList (), // initialized below
86 m_Regular (0),
87 m_SimpleFlag (false),
88 m_StemType (UNKNOWN),
89 m_StemLoc( 0),
90 m_Stem2Loc (0),
91 m_NumberOfStems (0),
92 m_PrefixLoc (0),
93 m_SuffixLoc (0),
94 m_Confidence (QString()),
95 m_pStem (NULL),
96 m_strStem (),
97 m_strSuffix (),
98 m_strPrefix (),
99 m_pSuffix (NULL),
100 m_pPrefix (NULL),
101 m_LengthOfPointerToMe(0.0),
102 m_WordPtrList(new QList<CStem*>),
103 m_LeftNeighbors (),
104 m_RightNeighbors (),
105 // compounding.
106 m_MyEarleyParser (),
107 m_CompoundCount (0.0),
108 m_Affixness (0.0),
109 // phonology.
110 m_Phonology_Tier1 (),
111 m_Phonology_Tier2 (),
112 m_Phonology_Tier1_Skeleton(),
113 m_UnigramLogProb (0.0),
114 m_BigramLogProb (0.0),
115 m_BigramComplexity (0.0), // average
116 m_UnigramComplexity(0.0), // average
117 m_PhonologicalContent(0.0),
118 m_HMM_LogProbability(0.0),
119 // first Boltzmann model.
120 m_Tier2_LocalMI_Score(0.0),
121 m_LocalMI_TotalBoltzmannScore(0.0),
122 m_LocalMI_Plog(0.0),
123 // second Boltzmann model.
124 m_Tier2_DistantMI_Score(0.0),
125 m_DistantMI_TotalBoltzmannScore(0.0),
126 m_DistantMI_Plog(0.0),
127 // tier-1 phonology info for graphical display.
128 m_phonologies(), m_unigrams(), m_mis(),
129 m_countofunigrams(0),
130 m_countofmis(0),
131 m_maxpositive(0.0),
132 m_maxnegative(0.0),
133 m_donephonology(false)
135 m_SuffixList.Alphabetize();
136 m_PrefixList.Alphabetize();
139 CStem::CStem(const CStem& x)
140 : CLParse(x),
141 m_WordCount(x.m_WordCount),
142 m_BrokenForm(), // XXX. copy?
143 m_SuffixList(), // initialized below. XXX. copy?
144 m_pSuffixSignature (x.m_pSuffixSignature),
145 m_pPrefixSignature (x.m_pPrefixSignature),
146 m_PrefixList (), // initialized below. XXX. copy?
147 m_Regular (x.m_Regular),
148 m_SimpleFlag (x.m_SimpleFlag),
149 m_StemType (x.m_StemType),
150 m_StemLoc (x.m_StemLoc),
151 m_Stem2Loc (x.m_Stem2Loc),
152 m_NumberOfStems (x.m_NumberOfStems),
153 m_PrefixLoc (x.m_PrefixLoc),
154 m_SuffixLoc (x.m_SuffixLoc),
155 m_Confidence (x.m_Confidence),
156 m_pStem (x.m_pStem),
157 m_strStem (x.m_strStem),
158 m_strSuffix(),
159 m_strPrefix(), // XXX. copy?
160 m_pSuffix(x.m_pSuffix),
161 m_pPrefix(x.m_pPrefix),
162 m_LengthOfPointerToMe(x.m_LengthOfPointerToMe),
163 m_WordPtrList(new QList<CStem*>(*x.m_WordPtrList)),
164 m_LeftNeighbors(), m_RightNeighbors(), // XXX. copy?
165 // compounding.
166 m_MyEarleyParser(), // XXX. copy?
167 m_CompoundCount(x.m_CompoundCount),
168 m_Affixness(x.m_Affixness),
169 // phonology.
170 m_Phonology_Tier1(x.m_Phonology_Tier1),
171 m_Phonology_Tier2(x.m_Phonology_Tier2),
172 m_Phonology_Tier1_Skeleton(x.m_Phonology_Tier1_Skeleton),
173 m_UnigramLogProb(x.m_UnigramLogProb),
174 m_BigramLogProb(x.m_BigramLogProb),
175 m_BigramComplexity(x.m_BigramComplexity),
176 m_UnigramComplexity(x.m_UnigramComplexity),
177 m_PhonologicalContent(x.m_PhonologicalContent),
178 m_HMM_LogProbability(0.0), // XXX. copy?
179 // first Boltzmann model.
180 m_Tier2_LocalMI_Score(x.m_Tier2_LocalMI_Score),
181 m_LocalMI_TotalBoltzmannScore(0.0), // XXX. copy?
182 m_LocalMI_Plog(0.0), // XXX. copy?
183 // second Boltzmann model.
184 m_Tier2_DistantMI_Score(x.m_Tier2_DistantMI_Score),
185 m_DistantMI_TotalBoltzmannScore(0.0), // XXX. copy?
186 m_DistantMI_Plog(0.0), // XXX. copy?
187 // tier-1 phonology info for graphical display.
188 m_phonologies(), m_unigrams(), m_mis(), // XXX. copy?
189 m_countofunigrams(0), // XXX. copy?
190 m_countofmis(0), // XXX. copy?
191 m_maxpositive(0.0), // XXX. copy?
192 m_maxnegative(0.0), // XXX. copy?
193 m_donephonology(x.m_donephonology)
195 m_SuffixList.Alphabetize();
196 m_PrefixList.Alphabetize();
199 CStem::CStem(const CLParse& text_in_corpus)
200 : CLParse(text_in_corpus),
201 m_WordCount(0),
202 m_BrokenForm(),
203 m_SuffixList(), // initialized below
204 m_pSuffixSignature(NULL), m_pPrefixSignature(NULL),
205 m_PrefixList(), // initialized below
206 m_Regular(0),
207 m_SimpleFlag(false),
208 m_StemType(UNKNOWN),
209 m_StemLoc(0),
210 m_Stem2Loc(0),
211 m_NumberOfStems(0),
212 m_PrefixLoc(0), m_SuffixLoc(0),
213 m_Confidence(QString()),
214 m_pStem(NULL),
215 m_strStem(),
216 m_strSuffix(), m_strPrefix(),
217 m_pSuffix(NULL), m_pPrefix(NULL),
218 m_LengthOfPointerToMe(0.0),
219 m_WordPtrList(new QList<CStem*>),
220 m_LeftNeighbors(),
221 m_RightNeighbors(),
222 // compounding.
223 m_MyEarleyParser(),
224 m_CompoundCount(0.0),
225 m_Affixness(0.0),
226 // phonology.
227 m_Phonology_Tier1(),
228 m_Phonology_Tier2(),
229 m_Phonology_Tier1_Skeleton(),
230 m_UnigramLogProb(0.0),
231 m_BigramLogProb(0.0),
232 m_BigramComplexity(0.0), // average
233 m_UnigramComplexity(0.0), // average
234 m_PhonologicalContent(0.0),
235 m_HMM_LogProbability(0.0),
236 // first Boltzmann model.
237 m_Tier2_LocalMI_Score(0.0),
238 m_LocalMI_TotalBoltzmannScore(0.0),
239 m_LocalMI_Plog(0.0),
240 // second Boltzmann model.
241 m_Tier2_DistantMI_Score(0.0),
242 m_DistantMI_TotalBoltzmannScore(0.0),
243 m_DistantMI_Plog(0.0),
244 // tier-1 phonology info for graphical display.
245 m_phonologies(), m_unigrams(), m_mis(),
246 m_countofunigrams(0),
247 m_countofmis(0),
248 m_maxpositive(0.0),
249 m_maxnegative(0.0),
250 m_donephonology(false)
252 m_SuffixList.Alphabetize();
253 m_PrefixList.Alphabetize();
256 CStem::CStem(const CParse& text, CMiniLexicon* lex)
257 : CLParse(text, lex),
258 m_WordCount(0),
259 m_BrokenForm(),
260 m_SuffixList(), // initialized below
261 m_pSuffixSignature(NULL), m_pPrefixSignature(NULL),
262 m_PrefixList(), // initialized below
263 m_Regular(0),
264 m_SimpleFlag(false),
265 m_StemType(UNKNOWN),
266 m_StemLoc(0),
267 m_Stem2Loc(0),
268 m_NumberOfStems(0),
269 m_PrefixLoc(0), m_SuffixLoc(0),
270 m_Confidence(QString()),
271 m_pStem(NULL),
272 m_strStem(),
273 m_strSuffix(), m_strPrefix(),
274 m_pSuffix(NULL), m_pPrefix(NULL),
275 m_LengthOfPointerToMe(0.0),
276 m_WordPtrList(new QList<CStem*>),
277 m_LeftNeighbors(),
278 m_RightNeighbors(),
279 // compounding.
280 m_MyEarleyParser(),
281 m_CompoundCount(0.0),
282 m_Affixness(0.0),
283 // phonology.
284 m_Phonology_Tier1(),
285 m_Phonology_Tier2(),
286 m_Phonology_Tier1_Skeleton(),
287 m_UnigramLogProb(0.0),
288 m_BigramLogProb(0.0),
289 m_BigramComplexity(0.0), // average
290 m_UnigramComplexity(0.0), // average
291 m_PhonologicalContent(0.0),
292 m_HMM_LogProbability(0.0),
293 // first Boltzmann model.
294 m_Tier2_LocalMI_Score(0.0),
295 m_LocalMI_TotalBoltzmannScore(0.0),
296 m_LocalMI_Plog(0.0),
297 // second Boltzmann model.
298 m_Tier2_DistantMI_Score(0.0),
299 m_DistantMI_TotalBoltzmannScore(0.0),
300 m_DistantMI_Plog(0.0),
301 // tier-1 phonology info for graphical display.
302 m_phonologies(), m_unigrams(), m_mis(),
303 m_countofunigrams(0),
304 m_countofmis(0),
305 m_maxpositive(0.0),
306 m_maxnegative(0.0),
307 m_donephonology(false)
309 m_SuffixList.Alphabetize();
310 m_PrefixList.Alphabetize();
313 CStem::~CStem()
315 delete m_BrokenForm;
316 delete m_WordPtrList;
317 delete m_MyEarleyParser;
320 //-----------------------------------------------------------------
321 // Overloaded operators
322 //-----------------------------------------------------------------
324 void CStem::operator= (const CStem& RHS)
326 CopyParse(RHS);
328 m_BrokenForm = NULL;
329 m_Confidence = RHS.GetConfidence();
330 m_NumberOfStems = RHS.GetNumberOfStems();
331 m_pPrefix = RHS.GetPrefixPtr();
332 m_pPrefixSignature = RHS.GetPrefixSignature();
333 m_PrefixLoc = RHS.GetPrefixLoc();
334 m_pStem = RHS.GetStemPtr();
335 m_pSuffix = RHS.GetSuffixPtr();
336 m_pSuffixSignature = RHS.GetSuffixSignature();
337 m_Regular = RHS.GetRegular();
338 m_SimpleFlag = RHS.GetSimpleFlag();
339 m_Stem2Loc = RHS.GetStem2Loc();
340 m_StemLoc = RHS.GetStemLoc();
341 m_StemType = RHS.GetStemType();
342 m_SuffixLoc = RHS.GetSuffixLoc();
343 m_WordCount = RHS.GetWordCount();
344 m_Phonology_Tier1 = RHS.m_Phonology_Tier1;
345 m_Phonology_Tier2 = RHS.m_Phonology_Tier2;
346 m_Phonology_Tier1_Skeleton = RHS.m_Phonology_Tier1_Skeleton;
347 m_CompoundCount = RHS.GetCompoundCount();
348 m_Affixness = RHS.GetAffixness();
349 m_MyEarleyParser = RHS.GetMyEarleyParser();
350 // m_LengthOfPointerToMe = RHS.GetLengthOfPointerToMe();
352 // m_SuffixList.SetAlphabetical();
353 // m_PrefixList.SetAlphabetical();
358 void CStem::Copy (CStem& RHS)
360 CopyParse(RHS);
362 m_BrokenForm = NULL;
363 m_Confidence = RHS.GetConfidence();
364 m_NumberOfStems = RHS.GetNumberOfStems();
365 m_pPrefix = RHS.GetPrefixPtr();
366 m_pPrefixSignature = RHS.GetPrefixSignature();
367 m_PrefixLoc = RHS.GetPrefixLoc();
368 m_pStem = RHS.GetStemPtr();
369 m_pSuffix = RHS.GetSuffixPtr();
370 m_pSuffixSignature = RHS.GetSuffixSignature();
371 m_Regular = RHS.GetRegular();
372 m_SimpleFlag = RHS.GetSimpleFlag();
373 m_Stem2Loc = RHS.GetStem2Loc();
374 m_StemLoc = RHS.GetStemLoc();
375 m_StemType = RHS.GetStemType();
376 m_SuffixLoc = RHS.GetSuffixLoc();
377 m_WordCount = RHS.GetWordCount();
378 m_MyEarleyParser = RHS.GetMyEarleyParser();
379 // m_SuffixList.SetAlphabetical();
380 // m_PrefixList.SetAlphabetical();
382 m_CompoundCount = RHS.GetCompoundCount();
383 m_Affixness = RHS.GetAffixness();
384 m_LengthOfPointerToMe = RHS.GetLengthOfPointerToMe();
389 //-----------------------------------------------------------------
390 // Other methods
391 //-----------------------------------------------------------------
394 // Copy utility for stems
396 // Parameters:
397 // RHS - the stem to be copied
399 void CStem::CopyStemInformation(CStem* RHS)
401 m_Confidence = RHS->GetConfidence();
402 SetCorpusCount(RHS->GetCorpusCount());
403 m_NumberOfStems = RHS->GetNumberOfStems();
404 m_pPrefix = RHS->GetPrefixPtr();
406 if ( RHS->GetPrefixList() )
408 m_PrefixList = RHS->GetPrefixList();
411 m_PrefixLoc = RHS->GetPrefixLoc();
413 if ( RHS->GetPrefixSignature() )
415 m_pPrefixSignature = RHS->GetPrefixSignature();
418 m_pStem = RHS->GetStemPtr();
419 m_pSuffix = RHS->GetSuffixPtr();
420 m_Regular = RHS->GetRegular();
421 m_SimpleFlag = RHS->GetSimpleFlag();
422 m_Stem2Loc = RHS->GetStem2Loc();
423 m_StemLoc = RHS->GetStemLoc();
424 m_StemType = RHS->GetStemType();
425 m_SuffixLoc = RHS->GetSuffixLoc();
427 if ( RHS->GetSuffixList() )
429 m_SuffixList = RHS->GetSuffixList();
432 if ( RHS->GetSuffixSignature() )
434 m_pSuffixSignature = RHS->GetSuffixSignature();
437 m_WordCount = RHS->GetWordCount();
439 CStem* word;
440 for (int wordno = 0; wordno < RHS->GetWordPtrList()->size(); wordno++)
441 { word = RHS->GetWordPtrList()->at(wordno);
442 m_WordPtrList->append( word );
445 m_CompoundCount = RHS->GetCompoundCount();
446 m_Affixness = RHS->GetAffixness();
447 m_LengthOfPointerToMe = RHS->GetLengthOfPointerToMe();
448 m_MyEarleyParser = RHS->GetMyEarleyParser();
450 //-----------------------------------------------------------------------------------//
451 // Add the prefix 'NULL' to the list of prefixes
452 void CStem::AddNULLPrefix()
453 //-----------------------------------------------------------------------------------//
455 m_WordCount++;
456 QString Null = "NULL";
457 if ( ! m_PrefixList.ContainsNULL() )
459 m_PrefixList.Append (CStringSurrogate(Null.unicode(),0,Null.length()));
463 //-----------------------------------------------------------------------------------//
464 // Add the suffix 'NULL' to the list of suffixes
465 void CStem::AddNULLSuffix()
466 //-----------------------------------------------------------------------------------//
468 m_WordCount++;
469 QString Null = "NULL";
470 if ( ! m_SuffixList.ContainsNULL() )
472 m_SuffixList.Append (CStringSurrogate(Null.unicode(),0,Null.length()));
476 //-----------------------------------------------------------------------------------//
477 // Add a word to the word list
478 bool CStem::AddWord (CStem* pWord)
479 //-----------------------------------------------------------------------------------//
481 if ( m_WordPtrList->indexOf(pWord) < 0 )
483 m_WordPtrList->append (pWord);
484 return TRUE;
486 return FALSE;
490 // Add a prefix to the prefix list
492 // Parameters:
493 // pPrefix - pointer to the prefix to
494 // be added
496 void CStem::AddPrefix (CPrefix* pPrefix)
498 if (! ContainsPrefix (pPrefix)) {
499 // if(!m_PrefixList.Alphabetical()) m_PrefixList.Alphabetize();
500 m_PrefixList.Append (pPrefix->GetKey());
505 // Add a suffix to the suffix list
507 // Parameters:
508 // pSuffix - pointer to the suffix to
509 // be added
511 void CStem::AddSuffix(CSuffix* pSuffix)
513 if ( !ContainsSuffix (pSuffix) ) {
514 // if(!m_SuffixList.Alphabetical()) m_SuffixList.Alphabetize();
515 m_SuffixList.Append (pSuffix->GetKey());
522 // Add a suffix to the suffix list
524 // Parameters:
525 // key - surrogate string of the suffix to
526 // be added
528 void CStem::AddSuffix(const CStringSurrogate& key)
530 if ( !m_SuffixList.Contains (key) ) {
531 // if(!m_SuffixList.Alphabetical()) m_SuffixList.Alphabetize();
532 m_SuffixList.Append (key);
537 // Copy a list of suffixes into the suffix list
539 // Parameters:
540 // pParse - the list of new suffixes
542 void CStem::CopySuffixList(CParse* pParse)
544 for (int i = 1; i <= (int)pParse->Size(); i++)
546 AddSuffix ( pParse->GetPiece(i) );
551 // Find out if the prefix list contains a specific
552 // prefix
554 // Parameters:
555 // Prefix - the prefix in question
557 // Returns:
558 // bool - true if the prefix is in our list
560 bool CStem::ContainsPrefix(CPrefix* Prefix) const
562 if ( m_PrefixList.Contains (Prefix->GetKey()) ) {
563 return true;
564 } else {
565 return false;
569 bool CStem::ContainsPrefix(const CStringSurrogate& Prefix) const
571 if ( m_PrefixList.Contains (Prefix) ) {
572 return true;
573 } else {
574 return false;
578 QString CStem::GetSortingString ()
580 QString sortString = GetSuffixList()->Display(); return sortString;
582 // Add a prefix to the prefix list
584 // Parameters:
585 // Prefix - prefix surrogate to be added be added
587 void CStem::AddPrefix(const CStringSurrogate& Prefix)
589 if ( !ContainsPrefix (Prefix) )
591 m_PrefixList.Append (Prefix);
596 // Find out if the suffix list contains a specific
597 // suffix
599 // Parameters:
600 // Suffix - the suffix to look for
602 // Returns:
603 // bool - true if the suffix is in the list
605 bool CStem::ContainsSuffix(CSuffix* Suffix) const
607 if (m_SuffixList.Contains (Suffix->GetKey() ) ){
608 return true;
609 } else {
610 return false;
615 // Increment the word count
617 // Parameters:
618 // n - amount to increment, default = 1
620 void CStem::IncrementWordCount (int n )
622 m_WordCount += n;
623 Q_ASSERT (m_WordCount > 0);
624 Q_ASSERT (m_WordCount < 1000000);
628 // Get the prefix
630 // Parameters:
631 // Prefix - the parse to put the prefix in
633 void CStem::GetPrefix ( CParse& Prefix ) const
635 if (m_strPrefix.GetKeyLength() > 0) {
636 Prefix = m_strPrefix;
637 return;
639 Prefix = GetPiece( m_PrefixLoc );
643 // Get the suffix
645 // Parameters:
646 // Output - the parse to put the suffix in
648 void CStem::GetSuffix(CParse& Output ) const
650 if (m_strSuffix.GetKeyLength() > 0) {
651 Output = m_strSuffix;
652 return;
654 Output = GetPiece( m_SuffixLoc );
658 // Get the stem
660 // Parameters:
661 // Output - the parse to put the stem in
663 void CStem::GetStem(CParse& Output) const
665 if (m_strStem.GetKeyLength() > 0) {
666 Output = m_strStem;
667 return;
669 Output = GetPiece( m_StemLoc );
675 // Display the type of this stem
677 // Returns:
678 // QString - the type of this stem
680 QString CStem::DisplayStemType() const
682 switch (m_StemType)
684 case NORMAL:
685 { return ""; } // return "Normal":
686 case BIWORD_COMPOUND:
687 { return "2 word compound"; }
688 case MULTIPLE_COMPOUND:
689 { return "Multiple-word compound"; }
690 case POSSIBLE_COMPOUND:
691 { return "Possible compound"; };
692 case NUMBER:
693 { return "Number"; }
694 case UNKNOWN:
695 { return "??"; }
696 case ENDS_IN_HYPHEN:
697 { return "Ends in hyphen"; }
698 case STEM_COMPOUND:
699 { return "Compound"; }
700 case STEM_NORMAL:
701 { return "Stem"; }
702 case STEM_PLUS_SUFFIX:
703 { return "Stem & Suffix"; }
704 case POLYWORD_PIECE:
705 { return "Polyword piece"; }
706 default:
707 { return "???"; }
712 // Merge the prefix and stem
714 void CStem::ClearPrefixStemSplit()
716 if ( m_StemLoc && m_PrefixLoc)
718 MergePieces (m_PrefixLoc);
721 m_PrefixLoc = 0;
722 m_StemLoc = 1;
728 // Merge the root and suffix
730 void CStem::ClearRootSuffixSplit()
732 if ( m_StemLoc && HasASuffix() )
734 MergePieces (m_StemLoc);
736 if ( m_StemLoc == 1)
738 m_StemLoc = 0;
742 // TODO : define this function
743 // Get the sorting quantity
745 // Returns:
746 // float - the sorting quantity
748 float CStem::GetSortingQuantity() const
750 Q_ASSERT (0);
751 return 0;
754 /// used in allomorphy code.
755 void CStem::RepairSuffixList(const CMiniLexicon* Lexicon)
757 struct not_implemented { };
758 throw not_implemented();
759 static_cast<void>(Lexicon);
761 // QString NewSuffix;
762 // CSuffix* pNewSuffix;
764 // for (int i = 1; i <= m_SuffixList.Size(); ++i) {
765 // TODO: Get John's help to fix, I don't understand.
766 // CStringSurrogate ssSuffix = m_SuffixList.GetPiece(i);
767 // CSuffix* pOldSuffix = *Lexicon->GetSuffixes() ^= ssSuffix;
769 // SuffixStringTranslation.GetPiece(ssSuffix.SpellOut(), NewSuffix);
770 // pNewSuffix = *Lexicon->GetSuffixes() ^=
771 // }
774 // Detach a specific suffix from the list
776 // Parameters:
777 // pSuffix - the suffix to detach
779 void CStem::DetachSuffix(CSuffix* pSuffix)
781 Q_ASSERT (pSuffix);
782 m_SuffixList.Remove ( pSuffix->GetKey() );
783 pSuffix->RemoveFromStemPtrList ( this );
784 pSuffix->RemoveStemString ( GetKey() );
789 // Detach a specific prefix from the list
791 // Parameters:
792 // pPrefix - the prefix to detach
794 void CStem::DetachPrefix(CPrefix* pPrefix)
796 Q_ASSERT (pPrefix);
797 m_PrefixList.Remove ( pPrefix->GetKey() );
798 pPrefix->RemoveFromStemPtrList ( this );
799 pPrefix->RemoveStemString ( GetKey() );
803 // Remove a word from the word list
805 // Parameters:
806 // pWord - pointer to the word to be removed
808 void CStem::RemoveWordFromWordPtrList(CStem* pWord)
811 m_WordPtrList->remove(pWord);
815 // Replace the old suffix signature and return it
817 // Parameters:
818 // pNewSig - the new signature
820 // Returns:
821 // CSignature* - the old signature
823 CSignature* CStem::ChangeSuffixSignature(CSignature* pNewSig)
825 CSignature* pOldSig = m_pSuffixSignature;
827 if (pOldSig != 0)
828 pOldSig->DetachStem(this, CSignature::eCall_Words);
830 m_pSuffixSignature = pNewSig;
831 return pOldSig;
834 void CStem::OutputStem(Q3TextStream& outf, int index,
835 QMap<QString, QString>* filter)
837 QString confidence;
838 // "# Index | Stem | Confidence | Corpus Count | # of Words | Affixes | Words"
840 outf << " ";
842 outf.setf(2);
843 outf.width(5);
844 outf << index + 1;
845 outf << " ";
847 outf.width(20);
848 outf << Display();
849 outf << " ";
851 outf.width(20);
852 confidence = GetConfidence();
853 if( confidence == "" ) confidence = "NONE";
854 outf << confidence.replace( " ", "_" );
855 outf << " ";
857 outf.unsetf(2);
858 outf.width(12);
859 outf << GetCorpusCount();
860 outf << " ";
862 //outf.width(11);
863 if( GetSuffixSignature() )
865 outf.width(12);
866 outf << m_WordPtrList->size();
867 outf << " ";
869 outf.setf(2);
870 outf << GetSuffixSignature()->Display(' ', filter);
871 outf << " ";
873 else if( GetPrefixSignature() )
875 outf.width(12);
876 outf << GetPrefixSignature()->Size();
877 outf << " ";
879 outf.setf(2);
880 outf << GetPrefixSignature()->Display(' ', filter);
881 outf << " ";
883 else
885 outf << 0;
886 outf << " ";
888 outf.setf(2);
889 outf << "NONE";
890 outf << " ";
893 outf << endl;