HowManyAreAnalyzed(): use status_user_agent to report progress
[linguistica.git] / cMTModel1.cpp
blob1439b036be56eecf75aebcd3360526688a11a79f
1 // Implementation of the cMTModel1 class
2 // Copyright © 2009 The University of Chicago
3 #include "cMTModel1.h"
5 #include <QMessageBox>
6 #include "mTVolca.h"
7 #include "cMT.h"
8 #include "Typedefs.h"
10 //////////////////////////////////////////////////////////////////////
11 // Construction/Destruction
12 //////////////////////////////////////////////////////////////////////
14 cMTModel1::cMTModel1(cMT* myMT, int Iterations)
15 : m_myMT(myMT), m_Iterations(Iterations),
16 m_T(),
17 m_softCountOfT(),
18 m_lamdaT() { }
20 cMTModel1::~cMTModel1() { }
22 void cMTModel1::initT()
24 mTVolca* myVolca;
25 int totalAssociatedLanguage2Words;
26 IntToIntToDouble::iterator IntToIntToDoubleIt;
27 IntToDouble::iterator IntToDoubleIt;
28 IntToDouble* oneListForOneLanguage1Word;
29 double uniformProb;
30 int key;
34 myVolca = m_myMT ->m_Volca;
36 m_T = myVolca ->m_fastWordsPairs;
37 m_softCountOfT = myVolca ->m_fastWordsSoftCounts;
40 for ( IntToIntToDoubleIt = m_T.begin(); IntToIntToDoubleIt != m_T.end();IntToIntToDoubleIt++)
42 oneListForOneLanguage1Word = IntToIntToDoubleIt.data();
44 totalAssociatedLanguage2Words = oneListForOneLanguage1Word ->size();
46 uniformProb = 1.0 / totalAssociatedLanguage2Words;
48 for ( IntToDoubleIt = oneListForOneLanguage1Word ->begin(); IntToDoubleIt != oneListForOneLanguage1Word ->end(); IntToDoubleIt++)
50 key = IntToDoubleIt.key();
51 (*oneListForOneLanguage1Word)[key] = uniformProb;
55 for ( IntToIntToDoubleIt = m_softCountOfT.begin(); IntToIntToDoubleIt != m_softCountOfT.end();IntToIntToDoubleIt++)
57 oneListForOneLanguage1Word = IntToIntToDoubleIt.data();
59 for ( IntToDoubleIt = oneListForOneLanguage1Word ->begin(); IntToDoubleIt != oneListForOneLanguage1Word ->end(); IntToDoubleIt++)
61 IntToDoubleIt.data() = 0.0;
66 QMessageBox::information ( NULL, "Linguistica : MT Model1", "Finished InitT", "OK" );
71 void cMTModel1::EMLoops(int numberOfIterations)
73 int loopI;
75 for (loopI =0; loopI < numberOfIterations; loopI++)
77 // E step
78 EStep();
80 //QMessageBox::information ( NULL, "Linguistica : MT Model1", "Finished E-Step", "OK" );
82 // M step
83 MStep();
85 //QMessageBox::information ( NULL, "Linguistica : MT Model1", "Finished M-Step", "OK" );
87 // Clear softcouts for T
88 clearSoftCounts();
91 // Release softcountT memory
92 //releaseSoftCounts();
95 void cMTModel1::clearSoftCounts()
97 IntToIntToDouble::iterator IntToIntToDoubleIt;
98 IntToDouble* oneList;
99 IntToDouble::iterator IntToDoubleIt;
100 int key;
102 for ( IntToIntToDoubleIt = m_softCountOfT.begin(); IntToIntToDoubleIt != m_softCountOfT.end(); IntToIntToDoubleIt++)
104 oneList = IntToIntToDoubleIt.data();
106 for (IntToDoubleIt = oneList ->begin(); IntToDoubleIt != oneList ->end(); IntToDoubleIt++)
108 key = IntToDoubleIt.key();
109 (*oneList)[key] = 0.0;
115 void cMTModel1::EStep()
117 mTVolca* myVolca;
118 int i;
120 myVolca = m_myMT ->m_Volca;
122 m_lamdaT.clear();
123 for ( i=0; i < myVolca ->m_countOfSentences; i++)
125 addSoftCountOfT(i);
130 void cMTModel1::MStep()
132 IntToIntToDouble::iterator IntToIntToDoubleIt;
133 IntToDouble* oneList;
134 IntToDouble::iterator IntToDoubleIt;
135 int language1Id;
136 int language2Id;
137 double oneTotalSoftCount;
138 double oneLamda;
141 for ( IntToIntToDoubleIt = m_softCountOfT.begin(); IntToIntToDoubleIt != m_softCountOfT.end(); IntToIntToDoubleIt++)
143 language1Id = IntToIntToDoubleIt.key();
144 oneLamda = m_lamdaT[language1Id] ;
145 oneList = IntToIntToDoubleIt.data();
148 for (IntToDoubleIt = oneList ->begin(); IntToDoubleIt != oneList ->end(); IntToDoubleIt++)
150 language2Id = IntToDoubleIt.key();
151 oneTotalSoftCount = IntToDoubleIt.data();
152 (*(m_T[language1Id]))[language2Id] = oneTotalSoftCount / oneLamda ;
159 void cMTModel1::addSoftCountOfT(int sentenceId)
161 mTVolca* myVolca;
162 double oneT;
163 double deNumerator;
164 int l,m;
165 int language1WordId;
166 int language2WordId;
167 IntToInt* oneLan1Sentence;
168 IntToInt* oneLan2Sentence;
171 myVolca = m_myMT ->m_Volca;
173 oneLan1Sentence = myVolca ->m_language1Sentences[sentenceId];
174 oneLan2Sentence = myVolca ->m_language2Sentences[sentenceId];
177 for ( m=0; m< static_cast <int> (oneLan2Sentence ->size()); m++) //type cast to fix unsigned-signed warning
179 language2WordId = (*oneLan2Sentence)[m];
181 deNumerator =0;
183 for ( l=0; l < static_cast<int> (oneLan1Sentence ->size()); l++) //type cast: unsigned-signed int comparison
185 language1WordId = (*oneLan1Sentence)[l];
187 oneT = (*(m_T[language1WordId]))[language2WordId];
188 deNumerator += oneT;
191 for ( l=0; l < static_cast <int> (oneLan1Sentence ->size()); l++)
194 language1WordId = (*oneLan1Sentence)[l];
196 oneT = (*(m_T[language1WordId]))[language2WordId] / deNumerator;
198 (*(m_softCountOfT[language1WordId]))[language2WordId] += oneT;
200 if ( m_lamdaT.contains(language1WordId))
202 m_lamdaT[language1WordId] += oneT;
204 else
206 m_lamdaT.insert(language1WordId, oneT);
214 void cMTModel1::releaseSoftCounts()
216 IntToIntToDouble::iterator IntToIntToDoubleIt;
217 IntToDouble* oneList;
219 for ( IntToIntToDoubleIt = m_softCountOfT.begin(); IntToIntToDoubleIt != m_softCountOfT.end(); IntToIntToDoubleIt++)
221 oneList = IntToIntToDoubleIt.data();
223 delete oneList;