CMiniLexicon::FindMajorSignatures(): use log file routines
[linguistica.git] / PrefixCollection.cpp
blobc9a6be8f0e6b4839c02bba6a007bbadd1c3f4d7b
1 // Implementation of CPrefixCollection methods
2 // Copyright © 2009 The University of Chicago
3 #include "PrefixCollection.h"
5 #include <Q3TextStream>
6 #include <QIODevice>
7 #include <QFile>
8 #include "ui/Status.h"
9 #include "Lexicon.h"
10 #include "MiniLexicon.h"
11 #include "Prefix.h"
13 CPrefixCollection::CPrefixCollection (CMiniLexicon* Lex)
15 m_pMiniLex = Lex;
16 if( m_pMiniLex ) m_pLexicon = m_pMiniLex->GetLexicon();
21 CPrefixCollection::CPrefixCollection ( )
23 m_pLexicon = NULL;
24 m_pMiniLex = NULL;
28 CPrefixCollection::~CPrefixCollection()
30 if( m_pLexicon )
32 CPrefix* pPrefix;
34 for( int i = 0; i < GetCount(); i++ )
36 pPrefix = GetAt(i);
38 m_pLexicon->RemovePrefix( pPrefix );
47 CPrefix* CPrefixCollection::operator<< ( CStringSurrogate Prefix )
49 CPrefix* pPrefix;
51 pPrefix = AddToCollection( Prefix );
53 return pPrefix;
57 CPrefix* CPrefixCollection::operator<< ( CParse* Prefix )
59 CPrefix* pPrefix;
61 pPrefix = AddToCollection (*Prefix);
63 return pPrefix;
67 CPrefix* CPrefixCollection::operator<< ( QString Prefix )
69 CPrefix* pPrefix;
71 CStringSurrogate ssPrefix( Prefix );
72 pPrefix = AddToCollection( ssPrefix );
74 return pPrefix;
78 void CPrefixCollection::AddPointer( CPrefix* pPrefix )
80 TCollection<CPrefix>::AddPointer( pPrefix );
82 if( m_pLexicon ) m_pLexicon->InsertPrefix( pPrefix );
86 CPrefix* CPrefixCollection::AddToCollection( CParse& Prefix )
88 CPrefix* pPrefix = TCollection<CPrefix>::AddToCollection( Prefix );
90 if( m_pLexicon ) m_pLexicon->InsertPrefix( pPrefix );
92 return pPrefix;
96 CPrefix* CPrefixCollection::AddToCollection( CStringSurrogate& Prefix )
98 CPrefix* pPrefix = TCollection<CPrefix>::AddToCollection( Prefix );
100 if( m_pLexicon ) m_pLexicon->InsertPrefix( pPrefix );
102 return pPrefix;
106 void CPrefixCollection::Empty()
108 if( m_pLexicon )
110 CPrefix* pPrefix;
112 for( int i = 0; i < GetCount(); i++ )
114 pPrefix = GetAt(i);
116 m_pLexicon->RemovePrefix( pPrefix );
120 TCollection<CPrefix>::Empty();
124 void CPrefixCollection::RemoveAll()
126 if( m_pLexicon )
128 CPrefix* pPrefix;
130 for( int i = 0; i < GetCount(); i++ )
132 pPrefix = GetAt(i);
134 m_pLexicon->RemovePrefix( pPrefix );
138 TCollection<CPrefix>::RemoveAll();
142 bool CPrefixCollection::Remove( CPrefix* pPrefix )
144 if( m_pLexicon ) m_pLexicon->RemovePrefix( pPrefix );
146 return TCollection<CPrefix>::Remove( pPrefix );
150 bool CPrefixCollection::RemoveMember( CPrefix* pPrefix )
152 if( m_pLexicon ) m_pLexicon->RemovePrefix( pPrefix );
154 return TCollection<CPrefix>::RemoveMember( pPrefix );
158 bool CPrefixCollection::RemoveMember( CStringSurrogate& Prefix )
160 CPrefix* pPrefix = (CPrefix*)Find1( Prefix )->Get_T_Pointer();
162 if( m_pLexicon ) m_pLexicon->RemovePrefix( pPrefix );
164 return TCollection<CPrefix>::RemoveMember( Prefix );
168 bool CPrefixCollection::RemoveMember( CStringSurrogate& Prefix, bool b )
170 CPrefix* pPrefix = (CPrefix*)Find1( Prefix )->Get_T_Pointer();
172 if( m_pLexicon ) m_pLexicon->RemovePrefix( pPrefix );
174 return TCollection<CPrefix>::RemoveMember( Prefix, b );
178 void CPrefixCollection::DeleteMarkedMembers()
180 if ( m_DeletionArray == NULL ) { return; }
182 int count = GetCount();
183 for (int i = 0; i < count; i++)
185 if ( m_DeletionArray[i] == 1 )
187 if( m_pLexicon ) m_pLexicon->RemovePrefix( m_PointerArray[i] );
191 TCollection<CPrefix>::DeleteMarkedMembers();
194 void CPrefixCollection::ListDisplay(
195 Q3ListView* pView, QMap<QString, QString>* filter,
196 linguistica::ui::status_user_agent& status)
198 pView->setRootIsDecorated(false);
199 pView->setSorting(1);
201 // Clean it out first.
202 while (pView->columns() != 0)
203 pView->removeColumn(0);
205 // Add Column headers
206 pView->addColumn("Prefixes");
207 pView->addColumn("Descr. Length");
208 // XXX. suffixes has "Length Of Ptr" column
209 pView->addColumn("Corpus Count");
210 pView->addColumn("Use Count");
211 pView->addColumn("Stems");
213 // XXX. suffixes sets column alignments and widths
215 status.major_operation = "Creating prefix list for display";
216 status.progress.clear();
218 if (m_SortValidFlag == false)
219 Sort(COUNT);
221 // Display each item
222 status.progress.set_denominator(GetCount());
223 for (int i = 0; i < (int) GetCount(); i++) {
224 GetAtSort(i)->ListDisplay(pView, filter, m_pLexicon->GetNumberOfCharacterTypes());
225 status.progress = i;
227 status.progress.clear();
228 status.major_operation.clear();
231 void CPrefixCollection::OutputPrefixes( QString FileName )
233 Q_ASSERT(!FileName.isEmpty());
234 QFile file( FileName );
236 if( file.open( QIODevice::WriteOnly ) )
238 Q3TextStream outf( &file );
239 outf.setEncoding( Q3TextStream::Unicode );
241 outf << "# Prefix Count" << endl;
242 outf << "# ------------" << endl;
243 outf << " " << GetCount() << endl << endl;
245 outf << "# Prefix | Use Count | Corpus Count | Index | " << endl;
246 outf << "# ------------------------------------------------- " << endl;
248 Sort( CORPUSCOUNT );
249 for (int i = 0; i < (int)GetCount(); i++)
251 GetAtSort(i)->OutputPrefix( outf );
254 file.close();
259 void CPrefixCollection::ReadPrefixFile (QString FileName)
261 Q_ASSERT(!FileName.isEmpty());
262 CPrefix* pPrefix;
263 QFile file(FileName);
264 QString Buffer, Key;
265 int Index,
266 size,
267 UseCount,
268 CorpusCount,
269 LineCount = 0;
271 if( file.exists() && file.open( QIODevice::ReadOnly ) )
273 Q3TextStream inf(&file);
274 inf.setEncoding ( Q3TextStream::Locale );
276 Buffer = inf.readLine();
277 Q_ASSERT( Buffer[0] == '#' );
279 Buffer = inf.readLine();
280 Q_ASSERT( Buffer[0] == '#' );
282 inf >> size;
284 Buffer = inf.readLine(); // end of size line
285 Q_ASSERT( Buffer.length() == 0 );
287 Buffer = inf.readLine(); // blank line
288 Q_ASSERT( Buffer.length() == 0 );
290 Buffer = inf.readLine();
291 Q_ASSERT( Buffer[0] == '#' );
293 Buffer = inf.readLine();
294 Q_ASSERT( Buffer[0] == '#' );
296 while( !inf.atEnd() && LineCount < size )
298 LineCount++;
300 inf >> Key;
301 inf >> UseCount;
302 inf >> CorpusCount;
303 inf >> Index;
305 // Filter all sequences that should be
306 // analyzed as one character
307 Key = Filter( m_pLexicon->GetInFilter(), Key );
309 pPrefix = *this << CStringSurrogate( Key );
310 pPrefix->IncrementUseCount( UseCount );
311 pPrefix->IncrementCorpusCount( CorpusCount );
314 file.close();
319 double CPrefixCollection::GetDL_PhonologicalContent()
321 double affix_total_dl = 0;
322 int i;
323 CPrefix* pAffix;
324 for( i = 0; i < (int)GetCount(); i++ )
326 pAffix = GetAt(i);
327 affix_total_dl += pAffix->ComputeDL( m_pMiniLex->GetNumberOfCharacterTypes() );
329 return affix_total_dl ;