HowManyAreAnalyzed(): use status_user_agent to report progress
[linguistica.git] / PhoneCollection.cpp
blobc2c1c9cfd6ddcf21b80fb94f63da7f55f496b722
1 // Implementation of CPhoneCollection methods
2 // Copyright © 2009 The University of Chicago
3 #include "PhoneCollection.h"
5 #include <Q3TextStream>
6 #include <QIODevice>
7 #include <QFile>
8 #include "ui/Status.h"
9 #include "MonteCarlo.h"
10 #include "Phone.h"
11 #include "Stem.h"
12 #include "BiphoneCollection.h"
13 #include "WordCollection.h"
14 #include "log2.h"
16 CPhoneCollection::CPhoneCollection(CWordCollection* words)
17 : m_MyWords(words),
18 m_MyBiphones(words),
19 m_expMI(),
20 m_expMIFromBoundary(),
21 m_expMIToBoundary() { }
23 double CPhoneCollection::GetSumOfMyMIs()
24 { return m_MyBiphones.GetSumOfMyMIs(); }
26 void CPhoneCollection::CountPhonesAndBiphonesInWord( CStem* pStem, eTier WhichTier)
29 CPhone* pPhone, *prevPhone;
30 prevPhone = NULL;
31 CParse* pThisString;
32 QString Biphone;
33 switch (WhichTier)
35 case (TIER_1):
36 pThisString = pStem->GetPhonology_Tier1();
37 break;
38 case (TIER_1_SKELETON):
39 pThisString = pStem->GetPhonology_Tier1_Skeleton();
40 break;
41 default:
42 pThisString = pStem->GetPhonology_Tier2();
47 if (pThisString->Size()==0) return;
49 for (int phoneno =1; phoneno <= pThisString->Size(); phoneno++)
51 QString temp2 = pThisString->GetPiece(phoneno).Display();
53 if ( phoneno == pThisString->Size() && // these conditions define a # at the end of a string like #dog#
54 pThisString->GetPiece(1).Display() == QString("#") &&
55 pThisString->GetPiece(phoneno).Display() == QString("#")
58 pPhone = *this ^= pThisString->GetPiece(phoneno);
60 else
62 pPhone = *this << pThisString->GetPiece(phoneno);
64 if (phoneno > 1)
65 m_MyBiphones.Insert(prevPhone, pPhone);
66 prevPhone = pPhone;
70 void CPhoneCollection::Normalize()
72 CPhone* pPhone;
73 CBiphone* pBiphone;
74 double MI, Freq;
76 for (int phoneno = 0; phoneno < GetCount(); phoneno++)
78 pPhone = GetAt(phoneno);
79 pPhone->m_Frequency = pPhone->GetCorpusCount() / (double) m_CorpusCount;
80 pPhone->m_LogFreq = -1 * base2log( pPhone->m_Frequency );
84 Q3DictIterator<CBiphone> it(m_MyBiphones);
85 for ( ; it.current(); ++it)
87 pBiphone = it.current();
88 Freq = (double) pBiphone->GetCorpusCount() / m_MyBiphones.GetTotalCount();
89 pBiphone->m_Freq = Freq;
90 pBiphone->m_LogFreq = -1 * base2log ( Freq );
91 pBiphone->m_CondProb = Freq / (double) pBiphone->m_MyFirst->m_Frequency;
92 pBiphone->m_LogCondProb = -1 * base2log ( pBiphone->m_CondProb );
94 MI = base2log ( Freq / ( pBiphone->m_MyFirst->m_Frequency * pBiphone->m_MySecond->m_Frequency ) );
96 pBiphone->m_MI = MI;
97 pBiphone->m_WMI = MI * pBiphone->GetCorpusCount();
98 pBiphone->m_NormalizedMI = pBiphone->m_MI - m_MyBiphones.m_Z_biphones;
100 m_MyBiphones.ComputeZ_MI();
102 for (it.toFirst() ; it.current(); ++it)
104 pBiphone = it.current();
105 pBiphone->m_NormalizedMI =
106 m_MyBiphones.m_Z_biphones - pBiphone->m_MI;
110 void CPhoneCollection::ListDisplay(Q3ListView* pView,
111 linguistica::ui::status_user_agent& status)
113 CPhone* pPhone;
115 pView->setRootIsDecorated(false);
116 pView->setSorting(1);
117 // Remove all previous columns
118 while (pView->columns() != 0)
119 pView->removeColumn(0);
121 // Add Column headers
122 pView->addColumn("Phone");
123 pView->addColumn("Count");
124 pView->addColumn("+LogProb");
125 pView->addColumn("Prob");
127 if (m_SortValidFlag == false)
128 Sort(KEY);
130 // Display all items
131 status.major_operation = "Creating phone list for display";
132 status.progress.clear();
133 status.progress.set_denominator(GetCount());
134 for (int phoneno = 0; phoneno < (int) GetCount(); phoneno++) {
135 pPhone = GetAtSort(phoneno);
136 pPhone->PhoneListDisplay(pView);
137 status.progress = phoneno;
139 status.progress.clear();
140 status.major_operation.clear();
143 void CPhoneCollection::PopulateMonteCarlo( MonteCarlo* pMyMonteCarlo)
145 QString FirstPhone;
146 MonteCarlo* qMonteCarlo;
147 int Size;
148 int NumberOfBigramsFound = 0;
149 CPhone* pPhone, *qPhone;
150 CBiphone* pBiphone;
151 int j;
152 // first, Unigram frequencies:
153 for (int i = 0; i < GetSize(); i++)
155 pPhone = GetAt(i);
156 pMyMonteCarlo->StockDictionary( pPhone->Display(), pPhone->m_Frequency, i );
159 static const QString FileName = "c:\\LxaDeleteMe.txt";
160 QFile file( FileName );
161 Q3TextStream outf( &file );
162 outf.setEncoding( Q3TextStream::Unicode );
164 // Now Bigram frequencies:
165 if (pMyMonteCarlo->m_ModelType == BIGRAM)
167 for (int i = 0; i < GetSize(); i++) // iterate throught the phonemes...
169 pPhone = GetAt(i);
170 FirstPhone = pPhone->Display();
171 Size = GetSize();
172 qMonteCarlo = new MonteCarlo ( Size, FirstPhone );
173 pMyMonteCarlo ->GetMyBigrams()->insert ( FirstPhone, qMonteCarlo ); // this is a QDict within the MonteCarlo, for subMonteCarlos for each phone
174 NumberOfBigramsFound = 0;
176 outf << endl << FirstPhone << endl;
178 for (j = 0; j < Size; j++) // iterate through the phonemes, for the second phoneme of this bigram
180 qPhone = GetAt(j);
181 pBiphone = m_MyBiphones.GetBiphone(pPhone, qPhone);
182 if (pBiphone)
184 qMonteCarlo->StockDictionary( qPhone->Display(), pBiphone->m_CondProb, j);
185 NumberOfBigramsFound++;
188 qMonteCarlo->SetSize ( NumberOfBigramsFound );
189 qMonteCarlo->Normalize();
190 //qMonteCarlo->Dump( &outf );
200 pMyMonteCarlo->Normalize();
207 file.close();
212 void CPhoneCollection::ComputeStringAgreementAndDisagreement(
213 CLParse* string1, CLParse* string2,
214 double& agreement_unigram, double& agreement_bigram,
215 double& disagreement_unigram, double& disagreement_bigram)
217 struct not_implemented { };
218 throw not_implemented();
220 static_cast<void>(string1);
221 static_cast<void>(string2);
222 static_cast<void>(agreement_unigram);
223 static_cast<void>(agreement_bigram);
224 static_cast<void>(disagreement_unigram);
225 static_cast<void>(disagreement_bigram);
227 // QString string1_alpha = string1->GetAlphabetizedForm();
228 // QString string2_alpha = string2->GetAlphabetizedForm();
229 // int i,j;
230 // j = 1;
231 // // first calculate unigram overlap and non-overlap costs.
232 // for (i=1; i <= string1->Size(); i++)
233 // {
234 // if (string1_alpha[i] == string2_alpha[j] )
235 // {
236 // agreement_unigram += ;
237 // }
238 // else if (string1_alpha[i] < string2_alpha[j])
239 // {
240 // disagreement_unigram += ;
241 // if (i==string1->Size()){
242 // break;
243 // }
244 // i++;
245 // }
246 // else
247 // {
248 // disagreement_unigram += ;
249 // if (j==string1->Size()){
250 // break;
251 // }
252 // j++;
253 // }
254 // }
255 // if (i < string1->Size()){
256 // for (; i <= string1->Size(); i++){
257 // disagreement_unigram += ;
258 // }
259 // }
260 // if (j < string1->Size()){
261 // for (; j <= string1->Size(); j++){
262 // disagreement_unigram += ;
263 // }
264 // } // end of calculation of unigram figures.