1 // Implementation of CSuffixCollection methods
2 // Copyright © 2009 The University of Chicago
3 #include "SuffixCollection.h"
5 #include <Q3TextStream>
9 #include "MiniLexicon.h"
14 //////////////////////////////////////////////////////////////////////
15 // Construction/Destruction
16 //////////////////////////////////////////////////////////////////////
18 CSuffixCollection::CSuffixCollection(CMiniLexicon
* Lex
)
20 // do the creation of TCollection with ptr to Lexicon and name of class
22 if( m_pMiniLex
) m_pLexicon
= m_pMiniLex
->GetLexicon();
26 CSuffixCollection::CSuffixCollection( )
33 CSuffixCollection::~CSuffixCollection()
38 for( int i
= 0; i
< GetCount(); i
++ )
42 m_pLexicon
->RemoveSuffix( pSuffix
);
47 void CSuffixCollection::ListDisplay(
48 Q3ListView
* pView
, QMap
<QString
, QString
>* filter
,
49 linguistica::ui::status_user_agent
& status
,
52 pView
->setRootIsDecorated(false);
55 // Clean it out first.
56 while (pView
->columns() != 0)
57 pView
->removeColumn(0);
60 pView
->addColumn("Prefixes");
61 pView
->addColumn("Descr. Length");
62 pView
->addColumn("Length of Ptr");
63 pView
->addColumn("Corpus Count");
64 pView
->addColumn("Use Count");
65 pView
->addColumn("Stems");
67 pView
->setColumnAlignment(0, Qt::AlignCenter
);
68 pView
->setColumnAlignment(1, Qt::AlignRight
);
69 pView
->setColumnAlignment(2, Qt::AlignRight
);
70 pView
->setColumnAlignment(3, Qt::AlignCenter
);
71 pView
->setColumnAlignment(4, Qt::AlignCenter
);
72 pView
->setColumnAlignment(5, Qt::AlignLeft
);
74 // Column three gets really wide, so
75 // limit the width to 100.
76 pView
->setColumnWidthMode(3, Q3ListView::Manual
);
77 pView
->setColumnWidth(3, 100);
79 status
.major_operation
= "Creating suffix list for display";
80 status
.progress
.clear();
82 if (m_SortValidFlag
== false)
86 status
.progress
.set_denominator(GetCount());
87 for (int i
= 0; i
< (int) GetCount(); i
++) {
88 GetAtSort(i
)->ListDisplay(pView
, filter
, ExpressDeletees
,
89 m_pLexicon
->GetNumberOfCharacterTypes());
92 status
.progress
.clear();
93 status
.major_operation
.clear();
96 CSuffix
* CSuffixCollection::operator<< ( CStringSurrogate
& Suffix
)
100 pSuffix
= AddToCollection( Suffix
);
106 CSuffix
* CSuffixCollection::operator<< ( CParse
* Suffix
)
110 pSuffix
= AddToCollection (*Suffix
);
116 CSuffix
* CSuffixCollection::operator<< ( CParse
& Suffix
)
120 pSuffix
= AddToCollection (Suffix
);
125 CSuffix
* CSuffixCollection::operator<< ( QString Suffix
)
129 CStringSurrogate
cssSuffix( Suffix
);
131 pSuffix
= AddToCollection( cssSuffix
);
137 void CSuffixCollection::AddPointer( CSuffix
* pSuffix
)
139 TCollection
<CSuffix
>::AddPointer( pSuffix
);
141 if( m_pLexicon
) m_pLexicon
->InsertSuffix( pSuffix
);
145 CSuffix
* CSuffixCollection::AddToCollection( CParse
& Suffix
)
147 CSuffix
* pSuffix
= TCollection
<CSuffix
>::AddToCollection( Suffix
);
149 if( m_pLexicon
) m_pLexicon
->InsertSuffix( pSuffix
);
155 CSuffix
* CSuffixCollection::AddToCollection( CStringSurrogate
& Suffix
)
157 CSuffix
* pSuffix
= TCollection
<CSuffix
>::AddToCollection( Suffix
);
159 if( m_pLexicon
) m_pLexicon
->InsertSuffix( pSuffix
);
165 void CSuffixCollection::Empty()
170 for( int suffixno
= 0; suffixno
< GetCount(); suffixno
++ )
172 pSuffix
= GetAt(suffixno
);
173 m_pLexicon
->RemoveSuffix( pSuffix
);
176 TCollection
<CSuffix
>::Empty();
180 void CSuffixCollection::RemoveAll()
186 for( int suffixno
= 0; suffixno
< GetCount(); suffixno
++ )
188 pSuffix
= GetAt(suffixno
);
189 m_pLexicon
->RemoveSuffix( pSuffix
);
192 TCollection
<CSuffix
>::RemoveAll();
196 bool CSuffixCollection::Remove( CSuffix
* pSuffix
)
198 if( m_pLexicon
) m_pLexicon
->RemoveSuffix( pSuffix
);
200 return TCollection
<CSuffix
>::Remove( pSuffix
);
204 bool CSuffixCollection::RemoveMember( CSuffix
* pSuffix
)
206 if( m_pLexicon
) m_pLexicon
->RemoveSuffix( pSuffix
);
208 return TCollection
<CSuffix
>::RemoveMember( pSuffix
);
212 bool CSuffixCollection::RemoveMember( CStringSurrogate
& Suffix
)
214 CSuffix
* pSuffix
= (CSuffix
*)Find1( Suffix
)->Get_T_Pointer();
216 if( m_pLexicon
) m_pLexicon
->RemoveSuffix( pSuffix
);
218 return TCollection
<CSuffix
>::RemoveMember( Suffix
);
222 bool CSuffixCollection::RemoveMember( CStringSurrogate
& Suffix
, bool b
)
224 CSuffix
* pSuffix
= (CSuffix
*)Find1( Suffix
)->Get_T_Pointer();
226 if( m_pLexicon
) m_pLexicon
->RemoveSuffix( pSuffix
);
228 return TCollection
<CSuffix
>::RemoveMember( Suffix
, b
);
231 int CSuffixCollection::GetTotalUseCount()
232 { if (m_TotalUseCount
<= 0)
233 CalculateTotalUseCount();
234 return m_TotalUseCount
;
237 void CSuffixCollection::CalculateTotalUseCount()
240 for (int suffixno
= 0; suffixno
< GetCount(); suffixno
++)
241 { m_TotalUseCount
+= GetAt(suffixno
)->GetUseCount();
245 void CSuffixCollection::DeleteMarkedMembers()
247 if ( m_DeletionArray
== NULL
) { return; }
249 int count
= GetCount();
250 for (int suffixno
= 0; suffixno
< count
; suffixno
++)
252 if ( m_DeletionArray
[suffixno
] == 1 )
254 if( m_pLexicon
) m_pLexicon
->RemoveSuffix( m_PointerArray
[suffixno
] );
257 TCollection
<CSuffix
>::DeleteMarkedMembers();
261 void CSuffixCollection::OutputSuffixes( QString FileName
)
263 QFile
file( FileName
);
265 if( file
.open( QIODevice::WriteOnly
) )
267 Q3TextStream
outf( &file
);
268 outf
.setEncoding( Q3TextStream::Unicode
);
270 outf
<< "# Suffix Count" << endl
;
271 outf
<< "# ------------" << endl
;
272 outf
<< " " << GetCount() << endl
<< endl
;
274 outf
<< "# Suffix | Use Count | Corpus Count | Index | " << endl
;
275 outf
<< "# ------------------------------------------------- " << endl
;
278 for (int suffixno
= 0; suffixno
< (int)GetCount(); suffixno
++)
280 GetAtSort(suffixno
)->OutputSuffix( outf
);
288 void CSuffixCollection::ReadSuffixFile (QString FileName
)
291 QFile
file(FileName
);
299 if( file
.exists() && file
.open( QIODevice::ReadOnly
) )
301 Q3TextStream
inf(&file
);
302 inf
.setEncoding ( Q3TextStream::Locale
);
304 Buffer
= inf
.readLine();
305 Q_ASSERT( Buffer
[0] == '#' );
307 Buffer
= inf
.readLine();
308 Q_ASSERT( Buffer
[0] == '#' );
312 Buffer
= inf
.readLine(); // end of size line
313 Q_ASSERT( Buffer
.length() == 0 );
315 Buffer
= inf
.readLine(); // blank line
316 Q_ASSERT( Buffer
.length() == 0 );
318 Buffer
= inf
.readLine();
319 Q_ASSERT( Buffer
[0] == '#' );
321 Buffer
= inf
.readLine();
322 Q_ASSERT( Buffer
[0] == '#' );
324 while( !inf
.atEnd() && LineCount
< size
)
333 // Filter all sequences that should be
334 // analyzed as one character
335 Key
= Filter( m_pLexicon
->GetInFilter(), Key
);
337 pSuffix
= *this << Key
;
338 pSuffix
->IncrementUseCount( UseCount
);
339 pSuffix
->IncrementCorpusCount( CorpusCount
);
346 double CSuffixCollection::GetDL_PhonologicalContent()
348 double affix_total_dl
= 0;
350 for( int suffixno
= 0; suffixno
< (int)GetCount(); suffixno
++ )
352 pAffix
= GetAt(suffixno
);
353 affix_total_dl
+= pAffix
->ComputeDL( m_pMiniLex
->GetNumberOfCharacterTypes() );
355 return affix_total_dl
;
359 double CSuffixCollection::CalculatePointersToMySuffixes(eMDL_STYLE style
)//sum of pointers to members; this should be removed, and replaced by "GetLengthOfPointerToMe"
361 double Denominator
= 0;
363 m_DLofPointersToMyMembers
= 0;
366 if (style
== CorpusCount
)
368 for (int suffixno
= 0; suffixno
< GetCount(); suffixno
++)
370 m_CorpusCount
+= GetAt(suffixno
)->GetCorpusCount();
372 for (int suffixno
= 0; suffixno
< GetCount(); suffixno
++)
374 ptr
= base2log ( m_CorpusCount
/ GetAt(suffixno
)->GetCorpusCount() );
375 m_DLofPointersToMyMembers
+= ptr
;
378 else if (style
== GrammarCount
)
380 for (int suffixno
= 0; suffixno
< GetCount(); suffixno
++)
382 Denominator
+= GetAt(suffixno
)->GetUseCount();
384 for (int suffixno
= 0; suffixno
< GetCount(); suffixno
++)
386 ptr
= base2log ( Denominator
/ GetAt(suffixno
)->GetUseCount() );
387 m_DLofPointersToMyMembers
+= ptr
;
390 return m_DLofPointersToMyMembers
;