HowManyAreAnalyzed(): use status_user_agent to report progress
[linguistica.git] / DLHistory.cpp
blob7ad91aafde211a63f4c979dd12c24efa11abd225
1 // Implementation of description length history display
2 // Copyright © 2009 The University of Chicago
3 #include "DLHistory.h"
5 #include "ui/Status.h"
6 #include "MiniLexicon.h"
7 #include "Lexicon.h"
8 #include "Prefix.h"
9 #include "Signature.h"
10 #include "Suffix.h"
11 #include "Affix.h"
12 #include "Stem.h"
13 #include "SignatureCollection.h"
14 #include "PrefixCollection.h"
15 #include "SuffixCollection.h"
16 #include "WordCollection.h"
17 #include "StemCollection.h"
18 #include "StringFunc.h"
20 CDLHistoryListViewItem::CDLHistoryListViewItem( Q3ListView *parent )
21 : Q3ListViewItem( parent )
25 CDLHistoryListViewItem::CDLHistoryListViewItem( Q3ListView *parent,
26 QString mini_name,
27 CDLHistory* pDLHistory,
28 int index )
29 : Q3ListViewItem( parent, mini_name )
31 m_dlhistory = pDLHistory;
32 m_index = index;
36 CDLHistoryListViewItem::CDLHistoryListViewItem( Q3ListViewItem *parent,
37 QString mini_name,
38 CDLHistory* pDLHistory,
39 int index )
40 : Q3ListViewItem( parent, mini_name )
42 m_dlhistory = pDLHistory;
43 m_index = index;
47 QString CDLHistoryListViewItem::key(int column, bool ascending) const
49 switch (column) {
50 case 0:
51 return QString("%1").arg(m_index, 10);
52 case 1:
53 return m_dlhistory->getMini(m_index);
54 case 2:
55 return m_dlhistory->getRemark(m_index);
56 case 3:
58 double total_dl = 0.0;
59 total_dl += m_dlhistory->getStemDL(m_index);
60 total_dl += m_dlhistory->getAffixDL(m_index);
61 total_dl += m_dlhistory->getSigDL(m_index);
62 return QString("%1").arg(static_cast<int>(
63 double(1000) * total_dl), 10);
65 case 4:
66 return QString("%1").arg(static_cast<int>(
67 double(1000) * m_dlhistory->getUnanalyzedWordDL(m_index)),
68 10);
69 case 6:
70 return QString("%1").arg(static_cast<int>(
71 double(1000) * m_dlhistory->getStemDL(m_index)), 10);
72 case 7:
73 return QString("%1").arg(static_cast<int>(
74 double(1000) * m_dlhistory->getAffixDL(m_index)), 10);
75 case 8:
76 return QString("%1").arg(static_cast<int>(
77 double(1000) * m_dlhistory->getSigDL(m_index)), 10 );
78 case 9:
79 return QString("%1").arg(static_cast<int>(
80 m_dlhistory->getNumberOfStems(m_index)), 10);
81 case 10:
82 return QString("%1").arg(static_cast<int>(
83 m_dlhistory->getNumberOfAnalyzedWords(m_index)), 10);
84 case 11:
85 return QString("%1").arg(static_cast<int>(
86 m_dlhistory->getNumberOfUnanalyzedWords(m_index)), 10);
87 default:
88 return Q3ListViewItem::key(column, ascending);
92 QString CDLHistoryListViewItem::text(int column) const
94 switch (column) {
95 case 0:
96 return QString("%1").arg(m_index);
97 case 1:
98 return m_dlhistory->getMini( m_index );
99 case 2:
100 return m_dlhistory->getRemark( m_index );
101 case 3:
103 double total_dl = 0.0;
104 total_dl += m_dlhistory->getStemDL(m_index);
105 total_dl += m_dlhistory->getAffixDL(m_index);
106 total_dl += m_dlhistory->getSigDL(m_index);
107 total_dl += m_dlhistory->getUnanalyzedWordDL(m_index);
109 return DblToStringWithCommas(total_dl);
111 case 4:
112 return DblToStringWithCommas(m_dlhistory->getUnanalyzedWordDL(
113 m_index));
114 case 6:
115 return DblToStringWithCommas(m_dlhistory->getStemDL(m_index));
116 case 7:
117 return DblToStringWithCommas(m_dlhistory->getAffixDL(m_index));
118 case 8:
119 return DblToStringWithCommas(m_dlhistory->getSigDL(m_index));
120 case 9:
121 return IntToStringWithCommas(m_dlhistory->getNumberOfStems(
122 m_index));
123 case 10:
124 return IntToStringWithCommas(
125 m_dlhistory->getNumberOfAnalyzedWords(m_index));
126 case 11:
127 return IntToStringWithCommas(
128 m_dlhistory->getNumberOfUnanalyzedWords(m_index));
129 default:
130 return Q3ListViewItem::text(column);
134 int CDLHistoryListViewItem::compare(
135 Q3ListViewItem* item, int col, bool asc) const
137 CDLHistoryListViewItem& rhs = *static_cast<CDLHistoryListViewItem*>(item);
138 switch (col) {
139 case 0:
140 return m_index - rhs.m_index;
141 default:
142 return Q3ListViewItem::compare(item, col, asc);
146 // construction/destruction.
148 CDLHistory::CDLHistory()
149 : log(), s(linguistica::ui::ignore_status_updates()) { }
151 CDLHistory::CDLHistory(linguistica::ui::status_user_agent& status_display)
152 : log(), s(status_display) { }
154 CDLHistory::~CDLHistory() { }
156 void CDLHistory::DLHistoryListDisplay( Q3ListView* list )
158 // Remove all previous columns
159 while( list->columns() ) list->removeColumn( 0 );
161 // Add Column headers
162 list->addColumn( "Order",60 );
163 list->addColumn( "Mini-Lexicon",100 );
164 list->addColumn( "Remark",175 );
165 list->addColumn( "Total DL",160 );
166 list->setColumnAlignment( 3, Qt::AlignRight );
167 list->addColumn( "Unanalyzed words DL",130 );
168 list->setColumnAlignment( 4, Qt::AlignRight );
169 list->addColumn( "--" );
170 list->setColumnAlignment( 5, Qt::AlignRight );
171 list->addColumn( "Stems DL",75 );
172 list->setColumnAlignment( 6, Qt::AlignRight );
173 list->addColumn( "Affixes DL",75 );
174 list->setColumnAlignment( 7, Qt::AlignRight );
175 list->addColumn( "Signatures DL",90 );
176 list->setColumnAlignment( 8, Qt::AlignRight );
177 list->addColumn( "Stem count",75 );
178 list->setColumnAlignment( 9, Qt::AlignRight );
179 list->addColumn( "Analyzed words",100 );
180 list->setColumnAlignment( 10, Qt::AlignRight );
181 list->addColumn( "Unanalyzed words",120 );
182 list->setColumnAlignment( 11, Qt::AlignRight );
184 s.major_operation = "Creating description length list for display";
185 s.progress.clear();
186 s.progress.set_denominator(count());
187 for (int i = 0; i < count(); ++i) {
188 static_cast<void>(new CDLHistoryListViewItem(
189 list, getMini(i), this, i));
191 s.progress = i;
193 s.major_operation.clear();
194 s.progress.clear();
197 /// calculate current description length and add it to the stack.
198 /// if count() == std::numeric_limits<int>::max(),
199 /// clear the history first.
200 void CDLHistory::append(QString mini_name, QString remark,
201 CMiniLexicon* mini_ptr)
203 if (log.size() >= log.max_size() ||
204 log.size() >= static_cast<std::size_t>(
205 std::numeric_limits<int>::max()))
206 // too many entries
207 log.clear();
208 if (mini_ptr == 0)
209 return;
210 CMiniLexicon& mini = *mini_ptr;
211 CLexicon& lex = *mini.GetLexicon();
213 const int num_distinct_phonemes = lex.GetNumberOfCharacterTypes();
214 double stems_len = 0.0;
215 int num_stems = 0;
216 if (CStemCollection* stems = mini.GetStems()) {
217 num_stems = stems->GetCount();
218 for (int i = 0; i < num_stems; ++i) {
219 CStem& stem = *stems->GetAt(i);
220 stems_len += stem.ComputeDL(num_distinct_phonemes);
224 double unanalyzed_words_len = 0.0;
225 if (CWordCollection* words = mini.GetWords())
226 for (int i = 0; i < words->GetCount(); ++i) {
227 CStem& word = *words->GetAt(i);
228 if (word.IsAnalyzed())
229 continue;
230 unanalyzed_words_len += word.ComputeDL(
231 num_distinct_phonemes);
234 double affixes_len = 0.0;
235 if (CSuffixCollection* suffixes = mini.GetSuffixes()) {
236 affixes_len += suffixes->GetDL_PhonologicalContent();
237 } else {
238 CPrefixCollection* prefixes = mini.GetPrefixes();
239 Q_ASSERT(prefixes != 0);
240 // XXX. define prefixes->GetDL_PhonologicalContent()
242 for (int i = 0; i < prefixes->GetCount(); ++i) {
243 CAffix& affix = *prefixes->GetAt(i);
244 affixes_len += affix.ComputeDL(num_distinct_phonemes);
248 double signatures_len = 0.0;
249 if (CSignatureCollection* signatures = mini.GetSignatures())
250 for (int i = 0; i < signatures->GetCount(); ++i) {
251 CSignature& sig = *signatures->GetAt(i);
252 signatures_len += sig.ComputeDLofModel(
253 num_distinct_phonemes);
256 int num_unanalyzed_words = -1;
257 const int num_analyzed_words =
258 mini.GetNumberOfAnalyzedWords(num_unanalyzed_words);
260 this->log.push_back(entry());
261 entry& new_entry = log.back();
263 new_entry.stems_len = stems_len;
264 new_entry.unanalyzed_words_len = unanalyzed_words_len;
265 new_entry.affixes_len = affixes_len;
266 new_entry.signatures_len = signatures_len;
268 new_entry.num_stems = num_stems;
269 new_entry.num_unanalyzed_words = num_unanalyzed_words;
270 new_entry.num_analyzed_words = num_analyzed_words;
272 new_entry.remark = remark;
273 new_entry.mini_name = mini_name;