1 // Implementation of CPhoneCollection methods
2 // Copyright © 2009 The University of Chicago
3 #include "PhoneCollection.h"
5 #include <Q3TextStream>
9 #include "MonteCarlo.h"
12 #include "BiphoneCollection.h"
13 #include "WordCollection.h"
16 CPhoneCollection::CPhoneCollection(CWordCollection
* words
)
20 m_expMIFromBoundary(),
21 m_expMIToBoundary() { }
23 double CPhoneCollection::GetSumOfMyMIs()
24 { return m_MyBiphones
.GetSumOfMyMIs(); }
26 void CPhoneCollection::CountPhonesAndBiphonesInWord( CStem
* pStem
, eTier WhichTier
)
29 CPhone
* pPhone
, *prevPhone
;
36 pThisString
= pStem
->GetPhonology_Tier1();
38 case (TIER_1_SKELETON
):
39 pThisString
= pStem
->GetPhonology_Tier1_Skeleton();
42 pThisString
= pStem
->GetPhonology_Tier2();
47 if (pThisString
->Size()==0) return;
49 for (int phoneno
=1; phoneno
<= pThisString
->Size(); phoneno
++)
51 QString temp2
= pThisString
->GetPiece(phoneno
).Display();
53 if ( phoneno
== pThisString
->Size() && // these conditions define a # at the end of a string like #dog#
54 pThisString
->GetPiece(1).Display() == QString("#") &&
55 pThisString
->GetPiece(phoneno
).Display() == QString("#")
58 pPhone
= *this ^= pThisString
->GetPiece(phoneno
);
62 pPhone
= *this << pThisString
->GetPiece(phoneno
);
65 m_MyBiphones
.Insert(prevPhone
, pPhone
);
70 void CPhoneCollection::Normalize()
76 for (int phoneno
= 0; phoneno
< GetCount(); phoneno
++)
78 pPhone
= GetAt(phoneno
);
79 pPhone
->m_Frequency
= pPhone
->GetCorpusCount() / (double) m_CorpusCount
;
80 pPhone
->m_LogFreq
= -1 * base2log( pPhone
->m_Frequency
);
84 Q3DictIterator
<CBiphone
> it(m_MyBiphones
);
85 for ( ; it
.current(); ++it
)
87 pBiphone
= it
.current();
88 Freq
= (double) pBiphone
->GetCorpusCount() / m_MyBiphones
.GetTotalCount();
89 pBiphone
->m_Freq
= Freq
;
90 pBiphone
->m_LogFreq
= -1 * base2log ( Freq
);
91 pBiphone
->m_CondProb
= Freq
/ (double) pBiphone
->m_MyFirst
->m_Frequency
;
92 pBiphone
->m_LogCondProb
= -1 * base2log ( pBiphone
->m_CondProb
);
94 MI
= base2log ( Freq
/ ( pBiphone
->m_MyFirst
->m_Frequency
* pBiphone
->m_MySecond
->m_Frequency
) );
97 pBiphone
->m_WMI
= MI
* pBiphone
->GetCorpusCount();
98 pBiphone
->m_NormalizedMI
= pBiphone
->m_MI
- m_MyBiphones
.m_Z_biphones
;
100 m_MyBiphones
.ComputeZ_MI();
102 for (it
.toFirst() ; it
.current(); ++it
)
104 pBiphone
= it
.current();
105 pBiphone
->m_NormalizedMI
=
106 m_MyBiphones
.m_Z_biphones
- pBiphone
->m_MI
;
110 void CPhoneCollection::ListDisplay(Q3ListView
* pView
,
111 linguistica::ui::status_user_agent
& status
)
115 pView
->setRootIsDecorated(false);
116 pView
->setSorting(1);
117 // Remove all previous columns
118 while (pView
->columns() != 0)
119 pView
->removeColumn(0);
121 // Add Column headers
122 pView
->addColumn("Phone");
123 pView
->addColumn("Count");
124 pView
->addColumn("+LogProb");
125 pView
->addColumn("Prob");
127 if (m_SortValidFlag
== false)
131 status
.major_operation
= "Creating phone list for display";
132 status
.progress
.clear();
133 status
.progress
.set_denominator(GetCount());
134 for (int phoneno
= 0; phoneno
< (int) GetCount(); phoneno
++) {
135 pPhone
= GetAtSort(phoneno
);
136 pPhone
->PhoneListDisplay(pView
);
137 status
.progress
= phoneno
;
139 status
.progress
.clear();
140 status
.major_operation
.clear();
143 void CPhoneCollection::PopulateMonteCarlo( MonteCarlo
* pMyMonteCarlo
)
146 MonteCarlo
* qMonteCarlo
;
148 int NumberOfBigramsFound
= 0;
149 CPhone
* pPhone
, *qPhone
;
152 // first, Unigram frequencies:
153 for (int i
= 0; i
< GetSize(); i
++)
156 pMyMonteCarlo
->StockDictionary( pPhone
->Display(), pPhone
->m_Frequency
, i
);
159 static const QString FileName
= "c:\\LxaDeleteMe.txt";
160 QFile
file( FileName
);
161 Q3TextStream
outf( &file
);
162 outf
.setEncoding( Q3TextStream::Unicode
);
164 // Now Bigram frequencies:
165 if (pMyMonteCarlo
->m_ModelType
== BIGRAM
)
167 for (int i
= 0; i
< GetSize(); i
++) // iterate throught the phonemes...
170 FirstPhone
= pPhone
->Display();
172 qMonteCarlo
= new MonteCarlo ( Size
, FirstPhone
);
173 pMyMonteCarlo
->GetMyBigrams()->insert ( FirstPhone
, qMonteCarlo
); // this is a QDict within the MonteCarlo, for subMonteCarlos for each phone
174 NumberOfBigramsFound
= 0;
176 outf
<< endl
<< FirstPhone
<< endl
;
178 for (j
= 0; j
< Size
; j
++) // iterate through the phonemes, for the second phoneme of this bigram
181 pBiphone
= m_MyBiphones
.GetBiphone(pPhone
, qPhone
);
184 qMonteCarlo
->StockDictionary( qPhone
->Display(), pBiphone
->m_CondProb
, j
);
185 NumberOfBigramsFound
++;
188 qMonteCarlo
->SetSize ( NumberOfBigramsFound
);
189 qMonteCarlo
->Normalize();
190 //qMonteCarlo->Dump( &outf );
200 pMyMonteCarlo
->Normalize();
212 void CPhoneCollection::ComputeStringAgreementAndDisagreement(
213 CLParse
* string1
, CLParse
* string2
,
214 double& agreement_unigram
, double& agreement_bigram
,
215 double& disagreement_unigram
, double& disagreement_bigram
)
217 struct not_implemented
{ };
218 throw not_implemented();
220 static_cast<void>(string1
);
221 static_cast<void>(string2
);
222 static_cast<void>(agreement_unigram
);
223 static_cast<void>(agreement_bigram
);
224 static_cast<void>(disagreement_unigram
);
225 static_cast<void>(disagreement_bigram
);
227 // QString string1_alpha = string1->GetAlphabetizedForm();
228 // QString string2_alpha = string2->GetAlphabetizedForm();
231 // // first calculate unigram overlap and non-overlap costs.
232 // for (i=1; i <= string1->Size(); i++)
234 // if (string1_alpha[i] == string2_alpha[j] )
236 // agreement_unigram += ;
238 // else if (string1_alpha[i] < string2_alpha[j])
240 // disagreement_unigram += ;
241 // if (i==string1->Size()){
248 // disagreement_unigram += ;
249 // if (j==string1->Size()){
255 // if (i < string1->Size()){
256 // for (; i <= string1->Size(); i++){
257 // disagreement_unigram += ;
260 // if (j < string1->Size()){
261 // for (; j <= string1->Size(); j++){
262 // disagreement_unigram += ;
264 // } // end of calculation of unigram figures.