HowManyAreAnalyzed(): use status_user_agent to report progress
[linguistica.git] / SuffixCollection.cpp
blob316d9fcd8a99329982153c8129570dc125753843
1 // Implementation of CSuffixCollection methods
2 // Copyright © 2009 The University of Chicago
3 #include "SuffixCollection.h"
5 #include <Q3TextStream>
6 #include <QIODevice>
7 #include <QFile>
8 #include "ui/Status.h"
9 #include "MiniLexicon.h"
10 #include "Lexicon.h"
11 #include "Suffix.h"
12 #include "log2.h"
14 //////////////////////////////////////////////////////////////////////
15 // Construction/Destruction
16 //////////////////////////////////////////////////////////////////////
18 CSuffixCollection::CSuffixCollection(CMiniLexicon* Lex)
20 // do the creation of TCollection with ptr to Lexicon and name of class
21 m_pMiniLex = Lex;
22 if( m_pMiniLex ) m_pLexicon = m_pMiniLex->GetLexicon();
26 CSuffixCollection::CSuffixCollection( )
28 m_pMiniLex = NULL;
29 m_pLexicon = NULL;
33 CSuffixCollection::~CSuffixCollection()
35 if( m_pLexicon )
37 CSuffix* pSuffix;
38 for( int i = 0; i < GetCount(); i++ )
40 pSuffix = GetAt(i);
42 m_pLexicon->RemoveSuffix( pSuffix );
47 void CSuffixCollection::ListDisplay(
48 Q3ListView* pView, QMap<QString, QString>* filter,
49 linguistica::ui::status_user_agent& status,
50 bool ExpressDeletees)
52 pView->setRootIsDecorated(false);
53 pView->setSorting(1);
55 // Clean it out first.
56 while (pView->columns() != 0)
57 pView->removeColumn(0);
59 // Add Column headers
60 pView->addColumn("Prefixes");
61 pView->addColumn("Descr. Length");
62 pView->addColumn("Length of Ptr");
63 pView->addColumn("Corpus Count");
64 pView->addColumn("Use Count");
65 pView->addColumn("Stems");
67 pView->setColumnAlignment(0, Qt::AlignCenter);
68 pView->setColumnAlignment(1, Qt::AlignRight);
69 pView->setColumnAlignment(2, Qt::AlignRight);
70 pView->setColumnAlignment(3, Qt::AlignCenter);
71 pView->setColumnAlignment(4, Qt::AlignCenter);
72 pView->setColumnAlignment(5, Qt::AlignLeft);
74 // Column three gets really wide, so
75 // limit the width to 100.
76 pView->setColumnWidthMode(3, Q3ListView::Manual);
77 pView->setColumnWidth(3, 100);
79 status.major_operation = "Creating suffix list for display";
80 status.progress.clear();
82 if (m_SortValidFlag == false)
83 Sort(COUNT);
85 // Display each item
86 status.progress.set_denominator(GetCount());
87 for (int i = 0; i < (int) GetCount(); i++) {
88 GetAtSort(i)->ListDisplay(pView, filter, ExpressDeletees,
89 m_pLexicon->GetNumberOfCharacterTypes());
90 status.progress = i;
92 status.progress.clear();
93 status.major_operation.clear();
96 CSuffix* CSuffixCollection::operator<< ( CStringSurrogate& Suffix )
98 CSuffix* pSuffix;
100 pSuffix = AddToCollection( Suffix );
102 return pSuffix;
106 CSuffix* CSuffixCollection::operator<< ( CParse* Suffix )
108 CSuffix* pSuffix;
110 pSuffix = AddToCollection (*Suffix);
112 return pSuffix;
116 CSuffix* CSuffixCollection::operator<< ( CParse& Suffix )
118 CSuffix* pSuffix;
120 pSuffix = AddToCollection (Suffix);
122 return pSuffix;
125 CSuffix* CSuffixCollection::operator<< ( QString Suffix )
127 CSuffix* pSuffix;
129 CStringSurrogate cssSuffix( Suffix );
131 pSuffix = AddToCollection( cssSuffix );
133 return pSuffix;
137 void CSuffixCollection::AddPointer( CSuffix* pSuffix )
139 TCollection<CSuffix>::AddPointer( pSuffix );
141 if( m_pLexicon ) m_pLexicon->InsertSuffix( pSuffix );
145 CSuffix* CSuffixCollection::AddToCollection( CParse& Suffix )
147 CSuffix* pSuffix = TCollection<CSuffix>::AddToCollection( Suffix );
149 if( m_pLexicon ) m_pLexicon->InsertSuffix( pSuffix );
151 return pSuffix;
155 CSuffix* CSuffixCollection::AddToCollection( CStringSurrogate& Suffix )
157 CSuffix* pSuffix = TCollection<CSuffix>::AddToCollection( Suffix );
159 if( m_pLexicon ) m_pLexicon->InsertSuffix( pSuffix );
161 return pSuffix;
165 void CSuffixCollection::Empty()
167 if( m_pLexicon )
169 CSuffix* pSuffix;
170 for( int suffixno = 0; suffixno < GetCount(); suffixno++ )
172 pSuffix = GetAt(suffixno);
173 m_pLexicon->RemoveSuffix( pSuffix );
176 TCollection<CSuffix>::Empty();
180 void CSuffixCollection::RemoveAll()
182 if( m_pLexicon )
184 CSuffix* pSuffix;
186 for( int suffixno = 0; suffixno < GetCount(); suffixno++ )
188 pSuffix = GetAt(suffixno);
189 m_pLexicon->RemoveSuffix( pSuffix );
192 TCollection<CSuffix>::RemoveAll();
196 bool CSuffixCollection::Remove( CSuffix* pSuffix )
198 if( m_pLexicon ) m_pLexicon->RemoveSuffix( pSuffix );
200 return TCollection<CSuffix>::Remove( pSuffix );
204 bool CSuffixCollection::RemoveMember( CSuffix* pSuffix )
206 if( m_pLexicon ) m_pLexicon->RemoveSuffix( pSuffix );
208 return TCollection<CSuffix>::RemoveMember( pSuffix );
212 bool CSuffixCollection::RemoveMember( CStringSurrogate& Suffix )
214 CSuffix* pSuffix = (CSuffix*)Find1( Suffix )->Get_T_Pointer();
216 if( m_pLexicon ) m_pLexicon->RemoveSuffix( pSuffix );
218 return TCollection<CSuffix>::RemoveMember( Suffix );
222 bool CSuffixCollection::RemoveMember( CStringSurrogate& Suffix, bool b )
224 CSuffix* pSuffix = (CSuffix*)Find1( Suffix )->Get_T_Pointer();
226 if( m_pLexicon ) m_pLexicon->RemoveSuffix( pSuffix );
228 return TCollection<CSuffix>::RemoveMember( Suffix, b );
231 int CSuffixCollection::GetTotalUseCount()
232 { if (m_TotalUseCount <= 0)
233 CalculateTotalUseCount();
234 return m_TotalUseCount;
237 void CSuffixCollection::CalculateTotalUseCount()
239 m_TotalUseCount = 0;
240 for (int suffixno = 0; suffixno < GetCount(); suffixno++)
241 { m_TotalUseCount += GetAt(suffixno)->GetUseCount();
245 void CSuffixCollection::DeleteMarkedMembers()
247 if ( m_DeletionArray == NULL ) { return; }
249 int count = GetCount();
250 for (int suffixno = 0; suffixno < count; suffixno++)
252 if ( m_DeletionArray[suffixno] == 1 )
254 if( m_pLexicon ) m_pLexicon->RemoveSuffix( m_PointerArray[suffixno] );
257 TCollection<CSuffix>::DeleteMarkedMembers();
261 void CSuffixCollection::OutputSuffixes( QString FileName )
263 QFile file( FileName );
265 if( file.open( QIODevice::WriteOnly ) )
267 Q3TextStream outf( &file );
268 outf.setEncoding( Q3TextStream::Unicode );
270 outf << "# Suffix Count" << endl;
271 outf << "# ------------" << endl;
272 outf << " " << GetCount() << endl << endl;
274 outf << "# Suffix | Use Count | Corpus Count | Index | " << endl;
275 outf << "# ------------------------------------------------- " << endl;
277 Sort( CORPUSCOUNT );
278 for (int suffixno = 0; suffixno < (int)GetCount(); suffixno++)
280 GetAtSort(suffixno)->OutputSuffix( outf );
283 file.close();
288 void CSuffixCollection::ReadSuffixFile (QString FileName)
290 CSuffix* pSuffix;
291 QFile file(FileName);
292 QString Buffer, Key;
293 int Index,
294 size,
295 UseCount,
296 CorpusCount,
297 LineCount = 0;
299 if( file.exists() && file.open( QIODevice::ReadOnly ) )
301 Q3TextStream inf(&file);
302 inf.setEncoding ( Q3TextStream::Locale );
304 Buffer = inf.readLine();
305 Q_ASSERT( Buffer[0] == '#' );
307 Buffer = inf.readLine();
308 Q_ASSERT( Buffer[0] == '#' );
310 inf >> size;
312 Buffer = inf.readLine(); // end of size line
313 Q_ASSERT( Buffer.length() == 0 );
315 Buffer = inf.readLine(); // blank line
316 Q_ASSERT( Buffer.length() == 0 );
318 Buffer = inf.readLine();
319 Q_ASSERT( Buffer[0] == '#' );
321 Buffer = inf.readLine();
322 Q_ASSERT( Buffer[0] == '#' );
324 while( !inf.atEnd() && LineCount < size )
326 LineCount++;
328 inf >> Key;
329 inf >> UseCount;
330 inf >> CorpusCount;
331 inf >> Index;
333 // Filter all sequences that should be
334 // analyzed as one character
335 Key = Filter( m_pLexicon->GetInFilter(), Key );
337 pSuffix = *this << Key ;
338 pSuffix->IncrementUseCount( UseCount );
339 pSuffix->IncrementCorpusCount( CorpusCount );
342 file.close();
346 double CSuffixCollection::GetDL_PhonologicalContent()
348 double affix_total_dl = 0;
349 CSuffix* pAffix;
350 for( int suffixno = 0; suffixno < (int)GetCount(); suffixno++ )
352 pAffix = GetAt(suffixno);
353 affix_total_dl += pAffix->ComputeDL( m_pMiniLex->GetNumberOfCharacterTypes() );
355 return affix_total_dl ;
359 double CSuffixCollection::CalculatePointersToMySuffixes(eMDL_STYLE style )//sum of pointers to members; this should be removed, and replaced by "GetLengthOfPointerToMe"
361 double Denominator = 0;
362 double ptr;
363 m_DLofPointersToMyMembers = 0;
364 m_CorpusCount = 0;
366 if (style == CorpusCount )
368 for (int suffixno = 0; suffixno < GetCount(); suffixno++)
370 m_CorpusCount += GetAt(suffixno)->GetCorpusCount();
372 for (int suffixno = 0; suffixno < GetCount(); suffixno++)
374 ptr = base2log ( m_CorpusCount / GetAt(suffixno)->GetCorpusCount() );
375 m_DLofPointersToMyMembers += ptr;
378 else if (style == GrammarCount )
380 for (int suffixno = 0; suffixno < GetCount(); suffixno++)
382 Denominator += GetAt(suffixno)->GetUseCount();
384 for (int suffixno = 0; suffixno < GetCount(); suffixno++)
386 ptr = base2log ( Denominator/ GetAt(suffixno)->GetUseCount() );
387 m_DLofPointersToMyMembers += ptr;
390 return m_DLofPointersToMyMembers;