CMiniLexicon::FindMajorSignatures(): use log file routines
[linguistica.git] / Affix.cpp
blob3b74a3a6e1b8d64c5659730b8c87833cb7aed013
1 // Implementation of CAffix methods
2 // Copyright © 2009 The University of Chicago
3 #include "Affix.h"
4 #include "Stem.h"
5 #include "MiniLexicon.h"
6 #include "Lexicon.h"
7 #include "Typedefs.h"
8 #include "log2.h"
10 // construction/destruction.
12 /// skeleton of an affix object owned by mini.
13 /// one should probably initialize the underlying CLParse object with
14 /// CLParse methods afterwards.
15 CAffix::CAffix(CMiniLexicon* mini)
16 : CLParse(mini),
17 m_UseCount(0),
18 m_StemString(),
19 m_StemPtrList(new QList<CStem*>),
20 m_CompressedLength(0.0),
21 m_Deletees(), // initialized below
22 m_Morphees(), // initialized below
23 m_LengthOfPointerToMe(0.0),
24 m_PhonologicalInformationContent(0.0),
25 m_UnigramLogProb(0.0),
26 m_BigramLogProb(0.0)
28 m_Deletees.Alphabetize();
29 m_Morphees.Alphabetize();
32 /// affix object owned by mini, with underlying string str
33 /// Copies in the string str, so it’s okay if str becomes invalid later.
34 CAffix::CAffix(const CStringSurrogate& str,
35 CMiniLexicon* mini)
36 : CLParse(str, mini),
37 m_UseCount(0),
38 m_StemString(),
39 m_StemPtrList(new QList<CStem*>),
40 m_CompressedLength(0.0),
41 m_Deletees(), // initialized below
42 m_Morphees(), // initialized below
43 m_LengthOfPointerToMe(0.0),
44 m_PhonologicalInformationContent(0.0),
45 m_UnigramLogProb(0.0),
46 m_BigramLogProb(0.0)
48 m_Deletees.Alphabetize();
49 m_Morphees.Alphabetize();
52 CAffix::~CAffix() { delete m_StemPtrList; }
54 //-------------------------------------------
55 // Public mutator/accessor methods
56 //-------------------------------------------
59 // Increment the count of how many times this
61 // Parameters:
62 // n - the amount to increment
64 /**
65 Increment the number of stems associated with this CAffix.
67 void CAffix::IncrementUseCount (int n)
69 m_UseCount += n;
70 Q_ASSERT ( m_UseCount > 0);
71 Q_ASSERT ( m_UseCount < 1000000 );
75 // Append a new stem to the string without
76 // adding it to the list of stem pointers
78 // Parameters:
79 // Stem - the new stem
81 /**
82 Append a new CStringSurrogate stem representation to the list of stems. <kbd>Stem</kbd>
83 is a surrogate of the string to be added.
85 void CAffix::AppendToStemString(const CStringSurrogate& Stem)
88 if( !m_StemString.Contains( Stem ) ) m_StemString.Append(Stem);
92 // Add a stem to the list of stems and append
93 // to the stem string
95 // Parameters:
96 // pStem - the stem to be added
98 /**
99 Add a new CStem pointer to the list of stems. <kbd>pStem</kbd> is a pointer
100 to the stem that will be added.
102 void CAffix::AddStem(CStem* pStem)
104 if( ! m_StemPtrList->contains(pStem) )
106 m_StemPtrList->append(pStem);
107 AppendToStemString( pStem->GetKey() );
112 // Remove a stem from the list of stem pointers
114 // Parameters:
115 // pStemToRemove - pointer to the stem that
116 // will be removed from the list
119 Remove a CStem pointer from the list of stems. <kbd>pStemToRemove</kbd> is
120 a pointer to the stem that will be removed.
122 void CAffix::RemoveFromStemPtrList(CStem* pStemToRemove)
124 m_StemPtrList->removeAll(pStemToRemove);
128 // Remove a stem from the string without attempting
129 // to remove it from the pointer list
131 // Parameters:
132 // ssStem - the stem to be removed
135 Remove a CStringSurrogate stem representation from the list of stems. <kbd>ssStem</kbd>
136 is the surrogate string to be removed.
138 void CAffix::RemoveStemString(const CStringSurrogate& ssStem )
140 m_StemString.Remove( ssStem );
145 Add a CStringSurrogate deletee. Deletees are substrings that are deleted
146 from an allomorph. <kbd>ssDeletee</kbd> is a surrogate representation
147 of the deletee.
149 void CAffix::AddDeletee( CStringSurrogate ssDeletee )
151 if( !m_Deletees.Contains( ssDeletee ) )
153 m_Deletees.Append( ssDeletee );
158 Add a CStringSurrogate morphee pair. Morphees are substrings that are
159 different in an allomorph. <kbd>y</kbd> is the substring that is changed.
160 <kbd>i</kbd> is the substring <i>y</i> is changed into.
162 void CAffix::AddMorphee ( CStringSurrogate y, CStringSurrogate i )
164 QString New;
165 New = y.Display() + "\\" + i.Display();
166 if( !m_Morphees.Contains( New ) )
168 m_Morphees.Append (New);
171 QString CAffix::ExpressAffix( bool ExpressDeletees )
173 QString Outstring;
174 QString ltSq = "<",
175 rtSq = ">",
176 ltCu = "{",
177 rtCu = "}";
179 if ( ExpressDeletees && m_Deletees.GetKeyLength() > 0 )
181 Outstring += ltSq;
182 Outstring += m_Deletees.Display();
183 Outstring += rtSq;
185 if ( ExpressDeletees && m_Morphees.GetKeyLength() > 0 )
187 QString strMorphees = m_Morphees.Display();
188 CSS ssMorphees( strMorphees );
189 Outstring +=ltCu ;
190 Outstring += ssMorphees.Display();
191 Outstring += rtCu ;
194 Outstring += GetKey().Display() ;
195 return Outstring;
199 double CAffix::GetLengthOfPointerToMe() // problem here jan 1 2010
201 if (m_LengthOfPointerToMe <= 0)
203 bool CORPUS_BASED_AFFIX_COUNT = m_pMyMini->GetIntParameter( "SignatureDL\\CorpusBasedAffixCount", 0 );
204 if ( m_pMyMini->GetCorpusCount() > 0 && GetCorpusCount() > 0 ) {
205 if (CORPUS_BASED_AFFIX_COUNT)
207 m_LengthOfPointerToMe = base2log ( m_pMyMini->GetCorpusCount () / GetCorpusCount() );
209 else
211 m_LengthOfPointerToMe = base2log ( m_pMyMini->GetCorpusCount () / GetCorpusCount() );
215 return m_LengthOfPointerToMe;
219 double CAffix::GetPhonologicalInformationContent()
221 if (m_PhonologicalInformationContent == 0)
223 bool CORPUS_BASED_AFFIX_COUNT = m_pMyMini->GetIntParameter( "SignatureDL\\CorpusBasedAffixCount", 0 );
224 if (CORPUS_BASED_AFFIX_COUNT )
226 m_PhonologicalInformationContent = base2log ( m_pMyMini->GetCorpusCount () / GetCorpusCount() );
228 else
230 m_PhonologicalInformationContent = base2log ( m_pMyMini->GetCorpusCount () / GetCorpusCount() );
233 return m_PhonologicalInformationContent;
237 void CAffix::CalculatePhonologicalInformationContent ( CLexicon* Lexicon )
239 if (m_BigramLogProb > 0)
241 m_PhonologicalInformationContent = m_BigramLogProb;
243 else
245 m_PhonologicalInformationContent = ComputeDL( Lexicon->GetNumberOfCharacterTypes() );
247 return;