HowManyAreAnalyzed(): use status_user_agent to report progress
[linguistica.git] / BiphoneCollection.cpp
blob6c776ac23000ce128fdf2b3ca5df5f52b365db59
1 // Implementation of CBiphoneCollection methods
2 // Copyright © 2009 The University of Chicago
3 #include "BiphoneCollection.h"
4 #include <cmath>
5 #include "linguisticamainwindow.h"
6 #include "ui/Status.h"
7 #include "Lexicon.h"
8 #include "Biphone.h"
9 #include "Phone.h"
10 #include "WordCollection.h"
11 #include "log2.h"
13 // construction/destruction.
15 CBiphoneCollection::CBiphoneCollection(CWordCollection* words)
16 : Q3Dict<CBiphone>(571),
17 m_PtrArray(),
18 m_CountSortArray(),
19 m_MISortArray(),
20 m_WMISortArray(),
21 m_SortStyle(WMI),
22 m_Phones(),
23 m_MyWords(words),
24 m_Count(),
25 m_LogFreqs(),
26 m_Freqs(),
27 m_Length(0),
28 m_MI(),
29 m_SumOfMyMIs(0.0),
30 m_TotalCount(0),
31 m_Z_biphones(0.0)
33 // XXX. set auto-delete?
36 CBiphoneCollection::CBiphoneCollection(CPhoneCollection* phones)
37 : Q3Dict<CBiphone>(571),
38 m_PtrArray(),
39 m_CountSortArray(),
40 m_MISortArray(),
41 m_WMISortArray(),
42 m_SortStyle(MI),
43 m_Phones(phones),
44 m_MyWords(),
45 m_Count(),
46 m_LogFreqs(),
47 m_Freqs(),
48 m_Length(0),
49 m_MI(),
50 m_SumOfMyMIs(0.0),
51 m_TotalCount(0),
52 m_Z_biphones(0.0)
54 // XXX. set auto-delete?
57 CBiphoneCollection::~CBiphoneCollection()
59 delete[] m_MI;
60 delete[] m_Freqs;
61 delete[] m_LogFreqs;
62 delete[] m_Count;
65 void CBiphoneCollection::Empty()
67 clear();
68 m_TotalCount = 0;
69 m_Z_biphones = 0;
72 CBiphone* CBiphoneCollection::Insert (CPhone* P1, CPhone* P2)
75 CBiphone* pBiphone;
76 QString biphone;
77 biphone = P1->Display() + "." + P2->Display();
79 m_TotalCount++;
81 pBiphone = find (biphone);
83 if ( ! pBiphone )
85 pBiphone = new CBiphone(biphone);
86 pBiphone->m_MyFirst = P1;
87 pBiphone->m_MySecond = P2;
88 pBiphone->IncrementCorpusCount(1);
89 insert(biphone, pBiphone);
91 else
93 pBiphone->IncrementCorpusCount(1);
95 return pBiphone;
98 CBiphone* CBiphoneCollection::operator ^= (QString biphone)
100 return find (biphone);
103 CBiphone* CBiphoneCollection::GetBiphone (CPhone* P1, CPhone* P2)
107 QString biphone;
108 biphone = P1->Display() + "." + P2->Display();
110 return find (biphone);
114 CBiphone* CBiphoneCollection::operator^= (CParse& Biphone)
117 StringToBiphone::Iterator it = m_Hash.find(Biphone.Display());
119 if ( it == m_Hash.end() )
121 return NULL;
123 else return &it.data();
128 CBiphone* CBiphoneCollection::GetBiphone (CPhone* Ph1, CPhone* Ph2)
130 CBiphone* pBiphone;
131 QString Biphone = Ph1->Display() + "." + Ph2->Display();
133 StringToBiphone::Iterator it = m_Hash.find( Biphone );
135 if ( it == m_Hash.end() )
137 return NULL;
139 else return pBiphone;
144 void CBiphoneCollection::SetSize(int n)
146 delete[] m_Count;
147 delete[] m_MI;
148 delete[] m_Freqs;
149 delete[] m_LogFreqs;
150 m_LogFreqs = new double [n*n];
151 m_Freqs = new double [n*n];
152 m_MI = new double [n*n];
153 m_Count = new int [n*n];
155 for (int i = 0; i < n*n; ++i) {
156 m_LogFreqs[i] = 0;
157 m_Freqs[i] = 0;
158 m_MI[i] = 0;
159 m_Count[i] = 0;
161 m_Length = n;
164 void CBiphoneCollection::SetAtLogFreq(int FirstIndex, int SecondIndex, double ThisLogFreq)
166 m_LogFreqs[ FirstIndex * m_Length + SecondIndex ] = ThisLogFreq;
168 double CBiphoneCollection::GetLogFreq(int FirstIndex, int SecondIndex)
170 return m_LogFreqs[ FirstIndex * m_Length + SecondIndex ];
172 void CBiphoneCollection::SetAtCount(int FirstIndex, int SecondIndex, int ThisCount)
174 m_Count[ FirstIndex * m_Length + SecondIndex ] = ThisCount;
176 int CBiphoneCollection::GetCount(int FirstIndex, int SecondIndex)
178 return m_Count[ FirstIndex * m_Length + SecondIndex ];
181 void CBiphoneCollection::SetAtMI(int FirstIndex, int SecondIndex, double ThisMI)
183 m_MI[ FirstIndex * m_Length + SecondIndex ] = ThisMI;
187 void CBiphoneCollection::IncrementAtCount (int FirstPhoneIndex, int SecondPhoneIndex, int n)
189 m_Count[ FirstPhoneIndex * m_Length + SecondPhoneIndex ] += n;
192 void CBiphoneCollection::ListDisplay( Q3ListView* pView )
194 pView->setRootIsDecorated( FALSE );
195 pView->setSorting(1);
196 // Remove all previous columns
197 while( pView->columns() ) pView->removeColumn( 0 );
199 // Add Column headers
200 pView->addColumn( "Bigram" );
201 pView->addColumn( "Count" );
202 pView->addColumn( "Frequency" );
203 pView->addColumn ( "Log Cond Prob" );
204 pView->addColumn( "+LogProb" );
205 pView->addColumn( "MutualInf" );
206 pView->addColumn( "WMI" );
207 pView->addColumn( "Cond Prob" );
208 pView->addColumn( "MI markedness" );
210 CLexicon& lex = *m_MyWords->GetLexicon();
211 linguistica::ui::status_user_agent& status = lex.status_display();
213 Q3Dict<CBiphone>& dict = *this;
214 status.major_operation = "Creating biphone list for display";
215 status.progress.clear();
216 status.progress.set_denominator(dict.count());
218 int biphone_nr = 0;
219 Q3DictIterator<CBiphone> iter(dict);
220 while (CBiphone* biphone = iter()) {
221 biphone->BiphoneListDisplay(pView);
222 status.progress = biphone_nr++;
225 status.progress.clear();
226 status.major_operation.clear();
229 double CBiphoneCollection::GetSumOfMyMIs()
231 Q3DictIterator<CBiphone> it (*this);
232 CBiphone* pBiphone;
233 if ( m_SumOfMyMIs == 0 )
235 for ( ; it.current(); ++it)
237 pBiphone = it.current();
238 m_SumOfMyMIs += pBiphone->m_MI;
241 return m_SumOfMyMIs;
243 double CBiphoneCollection::ComputeZ_MI()
245 using std::pow;
247 // Currently this is not correctly computing this Z:
248 // To compute it correctly, we need to assign an MI value to the unseen bigrams
249 // and take those into consideration. By leaving them out, we're assuming
250 // an infinite negative MI for those bigrams.
251 m_Z_biphones = 0;
252 Q3DictIterator<CBiphone> it (*this);
253 CBiphone* pBiphone;
254 double sum = 0;
255 if ( m_Z_biphones == 0 )
257 for ( ; it.current(); ++it)
259 pBiphone = it.current();
260 sum += pow(2, -1 * pBiphone->m_MI);
263 m_Z_biphones = log2(sum);
264 return m_Z_biphones;