1 // Implementation of CBiphoneCollection methods
2 // Copyright © 2009 The University of Chicago
3 #include "BiphoneCollection.h"
5 #include "linguisticamainwindow.h"
10 #include "WordCollection.h"
13 // construction/destruction.
15 CBiphoneCollection::CBiphoneCollection(CWordCollection
* words
)
16 : Q3Dict
<CBiphone
>(571),
33 // XXX. set auto-delete?
36 CBiphoneCollection::CBiphoneCollection(CPhoneCollection
* phones
)
37 : Q3Dict
<CBiphone
>(571),
54 // XXX. set auto-delete?
57 CBiphoneCollection::~CBiphoneCollection()
65 void CBiphoneCollection::Empty()
72 CBiphone
* CBiphoneCollection::Insert (CPhone
* P1
, CPhone
* P2
)
77 biphone
= P1
->Display() + "." + P2
->Display();
81 pBiphone
= find (biphone
);
85 pBiphone
= new CBiphone(biphone
);
86 pBiphone
->m_MyFirst
= P1
;
87 pBiphone
->m_MySecond
= P2
;
88 pBiphone
->IncrementCorpusCount(1);
89 insert(biphone
, pBiphone
);
93 pBiphone
->IncrementCorpusCount(1);
98 CBiphone
* CBiphoneCollection::operator ^= (QString biphone
)
100 return find (biphone
);
103 CBiphone
* CBiphoneCollection::GetBiphone (CPhone
* P1
, CPhone
* P2
)
108 biphone
= P1
->Display() + "." + P2
->Display();
110 return find (biphone
);
114 CBiphone* CBiphoneCollection::operator^= (CParse& Biphone)
117 StringToBiphone::Iterator it = m_Hash.find(Biphone.Display());
119 if ( it == m_Hash.end() )
123 else return &it.data();
128 CBiphone* CBiphoneCollection::GetBiphone (CPhone* Ph1, CPhone* Ph2)
131 QString Biphone = Ph1->Display() + "." + Ph2->Display();
133 StringToBiphone::Iterator it = m_Hash.find( Biphone );
135 if ( it == m_Hash.end() )
139 else return pBiphone;
144 void CBiphoneCollection::SetSize(int n
)
150 m_LogFreqs
= new double [n
*n
];
151 m_Freqs
= new double [n
*n
];
152 m_MI
= new double [n
*n
];
153 m_Count
= new int [n
*n
];
155 for (int i
= 0; i
< n
*n
; ++i
) {
164 void CBiphoneCollection::SetAtLogFreq(int FirstIndex
, int SecondIndex
, double ThisLogFreq
)
166 m_LogFreqs
[ FirstIndex
* m_Length
+ SecondIndex
] = ThisLogFreq
;
168 double CBiphoneCollection::GetLogFreq(int FirstIndex
, int SecondIndex
)
170 return m_LogFreqs
[ FirstIndex
* m_Length
+ SecondIndex
];
172 void CBiphoneCollection::SetAtCount(int FirstIndex
, int SecondIndex
, int ThisCount
)
174 m_Count
[ FirstIndex
* m_Length
+ SecondIndex
] = ThisCount
;
176 int CBiphoneCollection::GetCount(int FirstIndex
, int SecondIndex
)
178 return m_Count
[ FirstIndex
* m_Length
+ SecondIndex
];
181 void CBiphoneCollection::SetAtMI(int FirstIndex
, int SecondIndex
, double ThisMI
)
183 m_MI
[ FirstIndex
* m_Length
+ SecondIndex
] = ThisMI
;
187 void CBiphoneCollection::IncrementAtCount (int FirstPhoneIndex
, int SecondPhoneIndex
, int n
)
189 m_Count
[ FirstPhoneIndex
* m_Length
+ SecondPhoneIndex
] += n
;
192 void CBiphoneCollection::ListDisplay( Q3ListView
* pView
)
194 pView
->setRootIsDecorated( FALSE
);
195 pView
->setSorting(1);
196 // Remove all previous columns
197 while( pView
->columns() ) pView
->removeColumn( 0 );
199 // Add Column headers
200 pView
->addColumn( "Bigram" );
201 pView
->addColumn( "Count" );
202 pView
->addColumn( "Frequency" );
203 pView
->addColumn ( "Log Cond Prob" );
204 pView
->addColumn( "+LogProb" );
205 pView
->addColumn( "MutualInf" );
206 pView
->addColumn( "WMI" );
207 pView
->addColumn( "Cond Prob" );
208 pView
->addColumn( "MI markedness" );
210 CLexicon
& lex
= *m_MyWords
->GetLexicon();
211 linguistica::ui::status_user_agent
& status
= lex
.status_display();
213 Q3Dict
<CBiphone
>& dict
= *this;
214 status
.major_operation
= "Creating biphone list for display";
215 status
.progress
.clear();
216 status
.progress
.set_denominator(dict
.count());
219 Q3DictIterator
<CBiphone
> iter(dict
);
220 while (CBiphone
* biphone
= iter()) {
221 biphone
->BiphoneListDisplay(pView
);
222 status
.progress
= biphone_nr
++;
225 status
.progress
.clear();
226 status
.major_operation
.clear();
229 double CBiphoneCollection::GetSumOfMyMIs()
231 Q3DictIterator
<CBiphone
> it (*this);
233 if ( m_SumOfMyMIs
== 0 )
235 for ( ; it
.current(); ++it
)
237 pBiphone
= it
.current();
238 m_SumOfMyMIs
+= pBiphone
->m_MI
;
243 double CBiphoneCollection::ComputeZ_MI()
247 // Currently this is not correctly computing this Z:
248 // To compute it correctly, we need to assign an MI value to the unseen bigrams
249 // and take those into consideration. By leaving them out, we're assuming
250 // an infinite negative MI for those bigrams.
252 Q3DictIterator
<CBiphone
> it (*this);
255 if ( m_Z_biphones
== 0 )
257 for ( ; it
.current(); ++it
)
259 pBiphone
= it
.current();
260 sum
+= pow(2, -1 * pBiphone
->m_MI
);
263 m_Z_biphones
= log2(sum
);