1 // Implementation of CSignatureCollection methods
2 // Copyright © 2009 The University of Chicago
3 #include "SignatureCollection.h"
12 #include "ui/Status.h"
13 #include "MiniLexicon.h"
15 #include "Allomorphy.h"
16 #include "Signature.h"
20 #include "PrefixCollection.h"
21 #include "SuffixCollection.h"
22 #include "StemCollection.h"
25 // <<-------------------------------------------------------------------------------------------------------->>
26 CSignatureCollection::CSignatureCollection()
30 m_MemberName
= "Signatures";
33 m_DLofPointersToMyMembers
=0;
35 CSignatureCollection::CSignatureCollection( CMiniLexicon
* Lex
)
38 if( m_pMiniLex
) m_pLexicon
= m_pMiniLex
->GetLexicon();
39 m_MemberName
= "Signatures";
40 m_SignatureType
= m_pMiniLex
->GetAffixLocation();
43 m_DLofPointersToMyMembers
=0;
46 CSignatureCollection::CSignatureCollection (CMiniLexicon
* Lex
, CSuffixCollection
* suffixes
, eAffixLocation AfLoc
)
49 if( m_pMiniLex
) m_pLexicon
= m_pMiniLex
->GetLexicon();
50 m_MemberName
= "Signatures";
51 MySuffixes
= suffixes
;
52 m_SignatureType
= AfLoc
;
54 m_DLofPointersToMyMembers
=0;
57 CSignatureCollection::CSignatureCollection (CMiniLexicon
* Lex
, CPrefixCollection
* Prefixes
, eAffixLocation AfLoc
)
60 if( m_pMiniLex
) m_pLexicon
= m_pMiniLex
->GetLexicon();
61 m_MemberName
= "Signatures";
62 MyPrefixes
= Prefixes
;
63 m_SignatureType
= AfLoc
;
65 m_DLofPointersToMyMembers
=0;
68 CSignatureCollection::CSignatureCollection (eAffixLocation SigType
)
72 m_SignatureType
= SigType
;
73 m_MemberName
= "Signatures";
76 m_DLofPointersToMyMembers
=0;
79 // <<-------------------------------------------------------------------------------------------------------->>
80 CSignatureCollection::~CSignatureCollection()
86 for( int signo
= 0; signo
< GetCount(); signo
++ )
91 switch( m_SignatureType
)
95 m_pLexicon
->RemovePrefixSig( pSig
);
100 m_pLexicon
->RemoveSuffixSig( pSig
);
107 //==============================================================================================//
112 //==============================================================================================//
114 CSignature
* CSignatureCollection::operator^= (QString Signature
) //" Lookup"
116 if( Signature
.length() < 1 ) return NULL
;
118 CNode
*pNode
= Find1 ( CStringSurrogate( Signature
.unicode(),0,Signature
.length() ) );
121 return (CSignature
*) pNode
->Get_T_Pointer();
123 else { return NULL
; }
126 CSignature
* CSignatureCollection::operator^= (CParse
& Parse
) //" Lookup"
128 // Return NULL if parse is empty.
129 if( Parse
.GetKeyLength() < 1 || Parse
.Size() < 1 ) return NULL
;
133 QString display
= Parse
.Display('.');
134 CNode
*pNode
= Find1 ( CStringSurrogate( display
.unicode(),0,display
.length() ) );
137 return (CSignature
*) pNode
->Get_T_Pointer();
139 else { return NULL
; }
142 // <<-------------------------------------------------------------------------------------------------------->>
143 CSignature
* CSignatureCollection::operator^= (CParse
* pParse
) //" Lookup"
145 // Return NULL if parse is empty.
146 if( pParse
->GetKeyLength() < 1 || pParse
->Size() < 1 ) return NULL
;
148 pParse
->Alphabetize();
150 QString display
= pParse
->Display('.');
151 CNode
*pNode
= Find1 ( CStringSurrogate( display
.unicode(),0,display
.length() ) );
154 return (CSignature
*) pNode
->Get_T_Pointer();
156 else { return NULL
; }
158 // <<-------------------------------------------------------------------------------------------------------->>
159 CSignature
* CSignatureCollection::operator^= (CStringSurrogate
& Signature
) //" Lookup"
161 // Return NULL if Signature is empty.
162 if( Signature
.GetLength() < 1 ) return NULL
;
164 CNode
*pNode
= Find1 ( Signature
);
167 return (CSignature
*) pNode
->Get_T_Pointer();
169 else { return NULL
; }
173 // <<-------------------------------------------------------------------------------------------------------->>
174 CSignature
* CSignatureCollection::operator<< (CParse
* pParse
)
182 pParse
->Alphabetize(); // Jan 2009 JG
184 QString display
= pParse
->Display('.');
185 CParse SpelledOutSig
= CStringSurrogate(display
.unicode(),0,display
.length());
188 pTerminal
= Insert (SpelledOutSig
.GetKey(), &Result
);
191 pSig
= new CSignature( pParse
, m_pMiniLex
);
192 pTerminal
->SetPointer( pSig
);
195 switch (m_SignatureType
)
199 for (int affixno
= 1; affixno
<= pParse
->Size(); affixno
++)
201 pPrefix
= *m_pMiniLex
->GetPrefixes() ^= pParse
->GetPiece(affixno
);
204 pSig
->AppendPrefixPtr( pPrefix
);
211 for (int affixno
= 1; affixno
<= pParse
->Size(); affixno
++)
213 pSuffix
= *m_pMiniLex
->GetSuffixes() ^= pParse
->GetPiece(affixno
);
216 pSig
->AppendSuffixPtr( pSuffix
);
224 pSig
=(CSignature
*) pTerminal
->Get_T_Pointer();
229 switch( m_SignatureType
)
233 m_pLexicon
->InsertPrefixSig( pSig
);
238 m_pLexicon
->InsertSuffixSig( pSig
);
242 IncrementCorpusCount(1);
243 pSig
->IncrementCorpusCount(1);
245 m_SortValidFlag
= FALSE
;
246 m_HashHasChangedFlag
= TRUE
;
248 pSig
->SetLexicon( m_pMiniLex
);
249 pSig
->SetSignatureCollection ( this );
252 pSig
->SetAffixLocation ( m_SignatureType
);
259 CSignature
* CSignatureCollection::operator<< (CSignature
* Sig
)
261 CSignature
* pSig
= NULL
;
267 CParse SpelledOutSig
= CStringSurrogate(Sig
->Display('.').unicode(),0,Sig
->Display('.').length());
270 pTerminal
= Insert (SpelledOutSig
.GetKey(), &Result
);// CAUSED PROBLEM!!!!!!
274 pSig
= new CSignature(*Sig
);
275 pTerminal
->SetPointer (pSig
);
278 switch (m_SignatureType
)
283 for ( affixno
= 1; affixno
<= Sig
->Size(); affixno
++)
285 CPrefix
* pPrefix
= *m_pMiniLex
->GetPrefixes() ^= Sig
->GetPiece(affixno
);
287 pSig
->AppendPrefixPtr( pPrefix
);
296 for ( affixno
= 1; affixno
<= Sig
->Size(); affixno
++)
298 CSuffix
* pSuffix
= *m_pMiniLex
->GetSuffixes() ^= Sig
->GetPiece(affixno
);
300 pSig
->AppendSuffixPtr( pSuffix
);
309 pSig
=(CSignature
*) pTerminal
->Get_T_Pointer();
315 switch( m_SignatureType
) {
318 m_pLexicon
->InsertPrefixSig( pSig
);
323 m_pLexicon
->InsertSuffixSig( pSig
);
326 IncrementCorpusCount(1);
327 pSig
->IncrementCorpusCount(1);
328 m_SortValidFlag
= FALSE
;
329 m_HashHasChangedFlag
= TRUE
;
330 pSig
->SetLexicon( m_pMiniLex
);
331 pSig
->SetSignatureCollection ( this );
333 pSig
->SetAffixLocation ( m_SignatureType
);
338 //==============================================================================================//
343 //==============================================================================================//
344 void CSignatureCollection::SetMyPrefixes(CPrefixCollection
* pAC
){ MyPrefixes
= pAC
;}
345 void CSignatureCollection::SetMySuffixes(CSuffixCollection
* pAC
){ MySuffixes
= pAC
;}
348 void CSignatureCollection::FindDisplayOrdering()
350 int Size
= GetCount();
357 for (int signo
= 0; signo
< Size
; signo
++)
359 pSig
= GetAtSort(signo
); // We're looking for pSig's mentor, if it has one
360 if (pSig
->Size() < 2) continue;
361 for (int signo2
= 0; signo2
< signo
; signo2
++)
363 qSig
= GetAtSort(signo2
);
364 if ( qSig
->Contains(pSig
) )
366 pSig
->SetMentor (qSig
);
369 else pSig
->SetMentor( NULL
);
373 m_SortStyle
= SIG_MENTORS
;
374 m_SortValidFlag
= TRUE
;
377 void CSignatureCollection::ListDisplay(
378 Q3ListView
* pView
, QMap
<QString
, QString
>* filter
)
380 CLexicon
& lex
= *m_pLexicon
;
381 linguistica::ui::status_user_agent
& status
= lex
.status_display();
383 // XXX. make these adjustable by user.
384 int MinimumNumberOfStemsForDisplay
= 2;
385 int MinimumNumberOfAffixesForDisplay
= 2;
388 MinimumNumberOfStemsForDisplay
= 1;
390 pView
->setSorting(6);
392 // Remove all previous columns
393 while (pView
->columns() != 0)
394 pView
->removeColumn(0);
397 // Add Column headers
398 pView
->addColumn("Signatures");
399 pView
->addColumn("Exemplar");
400 pView
->addColumn("Descr. Length", 100);
401 pView
->addColumn("Corpus Count", 100);
402 pView
->addColumn("Stem Count", 100);
403 pView
->addColumn("Source");
404 pView
->addColumn("Robustness");
406 pView
->setColumnAlignment(0, Qt::AlignLeft
);
407 pView
->setColumnAlignment(1, Qt::AlignCenter
);
408 pView
->setColumnAlignment(2, Qt::AlignRight
);
409 pView
->setColumnAlignment(3, Qt::AlignCenter
);
410 pView
->setColumnAlignment(4, Qt::AlignCenter
);
411 pView
->setColumnAlignment(5, Qt::AlignCenter
);
412 pView
->setColumnAlignment(6, Qt::AlignCenter
);
414 status
.major_operation
= "Creating signature list for display";
415 status
.progress
.clear();
416 FindDisplayOrdering();
417 status
.progress
.set_denominator(GetCount()-1);
418 for (int signo
= GetCount()-1; signo
>=0 ; signo
--) {
419 CSignature
* pSig
= GetAtSort(signo
);
420 status
.progress
= GetCount()-1 - signo
;
421 if (pSig
->GetMentor())
423 if (pSig
->GetNumberOfStems() < MinimumNumberOfStemsForDisplay
)
425 if (pSig
->Size() < MinimumNumberOfAffixesForDisplay
)
428 CSignatureListViewItem
* item
= new CSignatureListViewItem(
429 pView
, pSig
->Express(), m_pMiniLex
->GetIndex(), pSig
, filter
);
430 if (pSig
->GetMentorList()) {
431 for (int signo2
= 0; signo2
< pSig
->GetMentorList()->size(); signo2
++) {
432 CSignature
* qSig
= pSig
->GetMentorList()->at(signo2
);
433 if (qSig
->GetNumberOfStems() < MinimumNumberOfStemsForDisplay
)
435 static_cast<void>(new CSignatureListViewItem(
436 item
, qSig
->Display(), m_pMiniLex
->GetIndex(), qSig
, filter
));
441 status
.progress
.clear();
442 status
.major_operation
.clear();
445 void CSignatureCollection::BorrowedSigsDisplay(
446 Q3ListView
* pView
, QMap
<QString
, QString
>* filter
)
448 CLexicon
& lex
= *m_pLexicon
;
449 linguistica::ui::status_user_agent
& status
= lex
.status_display();
451 // Remove all previous columns
452 while (pView
->columns() != 0)
453 pView
->removeColumn(0);
455 // Add Column headers
456 pView
->addColumn("Signatures");
457 pView
->addColumn("Source");
460 status
.major_operation
= "Creating signature list for display";
461 status
.progress
.clear();
462 status
.progress
.set_denominator(GetCount());
463 for (int signo
= 0; signo
< (int)GetCount(); signo
++) {
464 GetAt(signo
)->BorrowedSigsDisplay(pView
, filter
);
465 status
.progress
= signo
;
467 status
.progress
.clear();
468 status
.major_operation
.clear();
471 ////////////////////////////////////////////////////
472 ////////////////////////////////////////////////////
478 ////////////////////////////////////////////////////
479 ////////////////////////////////////////////////////
482 void CSignatureCollection::OutputSignatures( QString FileName
)
484 QFile
file( FileName
);
486 if( file
.open( QIODevice::WriteOnly
) )
488 QTextStream
outf( &file
);
489 outf
.setEncoding( QTextStream::Unicode
);
491 outf
<< "# Signature Count" << endl
;
492 outf
<< "# ---------------" << endl
;
493 outf
<< " " << GetCount() << " signatures" << endl
<< endl
;
497 for (int i
= 0; i
< (int)GetCount(); i
++)
499 GetAtSort(i
)->OutputSignature( outf
);
506 void CSignatureCollection::OutputXfst( QString FileName
)
508 QFile
file( FileName
);
510 if( file
.open( IO_WriteOnly
) )
512 QTextStream
outf( &file
); //Should be ascii file, not unicode
514 outf
<< "# " << endl
;
515 outf
<< "# File: " << FileName
<< endl
;
516 outf
<< "# Signature count: " << GetCount() << endl
;
517 outf
<< "# " << endl
;
518 // Sort( CORPUSCOUNT );
520 for (int i
= 0; i
< (int)GetCount(); i
++)
522 GetAtSort(i
)->OutputSignatureXfst( outf
, i
+1 );
526 outf
<< "union net" << endl
<< endl
;
527 outf
<< "print words" << endl
<< endl
;
535 ////////////////////////////////////////////////////
536 ////////////////////////////////////////////////////
542 ////////////////////////////////////////////////////
543 ////////////////////////////////////////////////////
545 /*void CSignatureCollection::LimitedOutput (QString Filename)
547 QFile file( Filename );
549 if( file.open( IO_WriteOnly ) )
551 QTextStream outf( &file );
552 int TotalWordCount = 0;
557 int NumEntries = GetCount();
559 outf.setf(2); // Set fields left justified
560 outf << "# Index Signature StemCount AffixCount log(StemCount)*log(AffixCount)" << endl << endl;
563 for( i = 0; i < NumEntries; i++ )
567 outf << ++counter << " ";
568 outf << pSig -> Display( '.', m_pLexicon->GetOutFilter() ) << " ";
569 outf << pSig -> GetStems().Size() << " ";
570 outf << pSig->GetNumberOfAffixes()
571 << " " << log( pSig->GetStems().Size() ) * log ( pSig->GetNumberOfAffixes() )
574 TotalWordCount += pSig->GetStemPtrList()->count() * pSig->GetNumberOfAffixes();
577 outf << endl << "Total number of words covered: " << TotalWordCount;
585 struct cannot_parse_input
: virtual std::exception
{ };
587 /// skip blank lines and comments
588 QString
get_line(QTextStream
& in
)
593 } while (buf
.isEmpty() || buf
[0] == '#');
597 /// swallow end of line, throwing an exception if that involves
599 void check_end_of_line(QTextStream
& in
)
601 QString remainder
= in
.readLine();
602 if (!remainder
.isEmpty())
603 throw cannot_parse_input();
606 int string_to_int(QString s
)
609 int result
= s
.toInt(&ok
);
611 throw cannot_parse_input();
616 void CSignatureCollection::ReadSignatureFile(QString Filename
,
617 enum eAffixLocation SigType
) { try
619 QFile
file(Filename
);
620 if (!file
.open(QIODevice::ReadOnly
))
623 QTextStream
inf(&file
);
625 const int signature_count
= string_to_int(
626 get_line(inf
).trimmed());
628 delete[] m_PointerArray
;
629 m_PointerArray
= new CSignature
*[signature_count
];
631 for (int count
= 1; count
<= signature_count
; ++count
) {
632 QString sig_header
= get_line(inf
).trimmed();
635 // SP+ signature SP+ stem count SP+ corpus count SP+
636 QTextStream
line_in(&sig_header
, QIODevice::ReadOnly
);
637 QString sig_graphemes
, stem_count_text
,
639 line_in
>> sig_graphemes
>>
640 stem_count_text
>> corpus_count_text
;
641 check_end_of_line(line_in
);
643 const QString sig_text
= Filter(m_pLexicon
->GetInFilter(),
645 const int stem_count
= string_to_int(stem_count_text
);
646 const int corpus_count
= string_to_int(corpus_count_text
);
648 // line 2: signature origin
649 QString remark
= get_line(inf
).trimmed();
650 remark
.replace(QChar('_'), QChar(' '));
652 std::auto_ptr
<CSignature
> sig(new CSignature(
653 SigType
, m_pMiniLex
));
654 sig
->IngestSignature(sig_text
);
655 sig
->SetCorpusCount(corpus_count
);
656 sig
->SetRemark(remark
);
657 sig
->SetSignatureCollection(this);
659 for (int i
= 0; i
< stem_count
; ++i
) {
662 // We haven’t read the Stems.txt file
663 // yet, so just swallow each stem here.
664 // The stems will be read from Signatures.txt
665 // when it is read again in
666 // ReadSignatureFileBis.
669 CNode
* terminal
= Insert(sig_text
);
670 m_PointerArray
[GetCount() - 1] = sig
.get();
671 terminal
->SetPointer(sig
.release());
673 } catch (cannot_parse_input
) {
674 // XXX. report to user
675 std::cerr
<< "Signature.txt: cannot parse" << std::endl
;
679 void CSignatureCollection::ReadSignatureFileBis(QString Filename
) { try
681 CStemCollection
* stems_ptr
= m_pMiniLex
->GetStems();
684 CStemCollection
& stems
= *stems_ptr
;
686 QFile
file(Filename
);
687 if (!file
.open(QIODevice::ReadOnly
))
689 QTextStream
inf(&file
);
691 const int signature_count
= string_to_int(
692 get_line(inf
).trimmed());
694 for (int signo
= 0; signo
< signature_count
; ++signo
) {
695 // see ReadSignatureFile().
696 QString sig_header
= get_line(inf
).trimmed();
697 QTextStream
line_in(&sig_header
, QIODevice::ReadOnly
);
698 QString sig_graphemes
, stem_count_text
,
700 line_in
>> sig_graphemes
>>
701 stem_count_text
>> corpus_count_text
;
702 check_end_of_line(line_in
);
704 const QString sig_text
= Filter(m_pLexicon
->GetInFilter(),
706 const int stem_count
= string_to_int(stem_count_text
);
709 sig_parse
.IngestSignature(sig_text
);
710 CSignature
* sig
= *this ^= sig_parse
;
713 for (int stemno
= 0; stemno
< stem_count
; ++stemno
) {
714 QString stem_graphemes
;
715 inf
>> stem_graphemes
;
717 const QString stem_text
= Filter(
718 m_pLexicon
->GetInFilter(), stem_graphemes
);
720 CStem
* stem
= stems
^= stem_text
;
722 // XXX. stem missing from Stems.txt
725 sig
->AppendStemPtr(stem
);
728 if (is_initial(sig
->GetAffixLocation())) {
729 for (int stemno
= 0; stemno
< sig
->GetNumberOfStems(); stemno
++)
731 CStem
* stem
= sig
->GetStem(stemno
);
732 for (int affixno
= 1; affixno
<= sig
->Size(); ++affixno
) {
734 *m_pMiniLex
->GetPrefixes() ^=
735 sig
->GetPiece(affixno
);
737 throw cannot_parse_input();
738 affix
->AddStem(stem
);
742 for (int stemno
= 0; stemno
< sig
->GetNumberOfStems(); stemno
++)
744 CStem
* stem
= sig
->GetStem(stemno
);
745 for (int affixno
= 1; affixno
<= sig
->Size(); ++affixno
) {
747 *m_pMiniLex
->GetSuffixes() ^=
748 sig
->GetPiece(affixno
);
750 throw cannot_parse_input();
751 affix
->AddStem(stem
);
757 } catch (cannot_parse_input
) {
758 // XXX. report to user
759 std::cerr
<< "Signature.txt: cannot re-parse" << std::endl
;
763 void CSignatureCollection::CheckRobustness()
765 CLexicon
& lex
= *m_pLexicon
;
766 linguistica::ui::status_user_agent
& status
= lex
.status_display();
768 status
.major_operation
= "Checking sig robustness";
769 status
.progress
.clear();
771 status
.progress
.set_denominator(GetCount());
772 for (int signo
= 1; signo
< (int)GetCount(); signo
++) {
773 CSignature
* pSig
= GetAtSort(signo
);
774 status
.progress
= signo
;
775 for (int signo2
= 0; signo2
< signo
; signo2
++) {
776 CSignature
* qSig
= GetAtSort(signo2
);
777 if (qSig
->Contains(pSig
)) {
778 pSig
->SetRobustness(qSig
->GetRobustness());
783 status
.progress
.clear();
785 // XXX. not an operation
786 status
.major_operation
= "Robustness checking complete.";
789 int CSignatureCollection::GetTotalNumberOfWords()
792 for (int signo
= 0; signo
< (int)GetCount(); signo
++)
794 Total
+= GetAt(signo
)->GetNumberOfStems() * GetAt(signo
)->Size();
800 int CSignatureCollection::TheseTwoSuffixesShareHowManyStems(CSuffix
* pSuffix1
, CSuffix
* pSuffix2
)
804 for (int signo
= 0; signo
< (int)GetCount(); signo
++)
807 if ( pSig
->Contains (pSuffix1
) && pSig
->Contains (pSuffix2
) )
809 count
+= pSig
->GetNumberOfStems();
815 void CSignatureCollection::CleanUp()
819 for (int signo
= 0; signo
< (int) GetCount(); signo
++)
822 if ( pSig
->GetNumberOfStems() <= 0 || pSig
->GetCorpusCount() <= 0 ) // -cs- 20040906 : added the second argument
824 // -cs- 20040602 : DeleteMarkedMembers wasn't actually finding any of the
825 // members to be to be deleted, so I changed it to remove them automatically,
826 // this fixed our word display bug (words weren't connected to their signature
832 void CSignatureCollection::AddPointer( CSignature
* pSignature
)
834 TCollection
<CSignature
>::AddPointer( pSignature
);
838 switch( m_SignatureType
)
842 m_pLexicon
->InsertPrefixSig( pSignature
);
847 m_pLexicon
->InsertSuffixSig( pSignature
);
853 CSignature
* CSignatureCollection::AddToCollection( CParse
& Signature
)
855 CSignature
* pSignature
= TCollection
<CSignature
>::AddToCollection( Signature
);
859 switch( m_SignatureType
)
863 m_pLexicon
->InsertPrefixSig( pSignature
);
868 m_pLexicon
->InsertSuffixSig( pSignature
);
876 CSignature
* CSignatureCollection::AddToCollection( CStringSurrogate
& Signature
)
878 CSignature
* pSignature
= TCollection
<CSignature
>::AddToCollection( Signature
);
882 switch( m_SignatureType
)
886 m_pLexicon
->InsertPrefixSig( pSignature
);
891 m_pLexicon
->InsertSuffixSig( pSignature
);
899 void CSignatureCollection::Empty()
903 CSignature
* pSignature
;
905 for( int signo
= 0; signo
< GetCount(); signo
++ )
907 pSignature
= GetAt(signo
);
909 switch( m_SignatureType
)
913 Q_ASSERT( m_pLexicon
->RemovePrefixSig( pSignature
) );
918 Q_ASSERT( m_pLexicon
->RemoveSuffixSig( pSignature
) );
923 TCollection
<CSignature
>::Empty();
927 void CSignatureCollection::RemoveAll()
931 CSignature
* pSignature
;
933 for( int signo
= 0; signo
< GetCount(); signo
++ )
935 pSignature
= GetAt(signo
);
937 switch( m_SignatureType
)
941 Q_ASSERT( m_pLexicon
->RemovePrefixSig( pSignature
) );
946 Q_ASSERT( m_pLexicon
->RemoveSuffixSig( pSignature
) );
951 TCollection
<CSignature
>::RemoveAll();
955 bool CSignatureCollection::Remove( CSignature
* pSignature
)
960 switch( m_SignatureType
)
964 Q_ASSERT( m_pLexicon
->RemovePrefixSig( pSignature
) );
969 Q_ASSERT( m_pLexicon
->RemoveSuffixSig( pSignature
) );
974 return TCollection
<CSignature
>::Remove( pSignature
);
978 bool CSignatureCollection::RemoveMember( CSignature
* pSignature
)
983 switch( m_SignatureType
)
987 Q_ASSERT( m_pLexicon
->RemovePrefixSig( pSignature
) );
992 Q_ASSERT( m_pLexicon
->RemoveSuffixSig( pSignature
) );
996 return TCollection
<CSignature
>::RemoveMember( pSignature
);
1000 bool CSignatureCollection::RemoveMember( CStringSurrogate
& Signature
)
1002 CSignature
* pSignature
= (CSignature
*)Find1( Signature
)->Get_T_Pointer();
1006 switch( m_SignatureType
)
1010 m_pLexicon
->RemovePrefixSig( pSignature
) ;
1015 Q_ASSERT( m_pLexicon
->RemoveSuffixSig( pSignature
) );
1019 return TCollection
<CSignature
>::RemoveMember( Signature
);
1023 bool CSignatureCollection::RemoveMember( CStringSurrogate
& Signature
, bool b
)
1025 CSignature
* pSignature
= (CSignature
*)Find1( Signature
)->Get_T_Pointer();
1029 switch( m_SignatureType
)
1033 m_pLexicon
->RemovePrefixSig( pSignature
) ;
1038 m_pLexicon
->RemoveSuffixSig( pSignature
) ;
1042 return TCollection
<CSignature
>::RemoveMember( Signature
, b
);
1046 void CSignatureCollection::DeleteMarkedMembers()
1048 if ( m_DeletionArray
== NULL
) { return; }
1050 int count
= GetCount();
1051 for (int signo
= 0; signo
< count
; signo
++)
1053 if ( m_DeletionArray
[signo
] == 1 )
1057 switch( m_SignatureType
)
1061 m_pLexicon
->RemovePrefixSig( m_PointerArray
[signo
] );
1066 m_pLexicon
->RemoveSuffixSig( m_PointerArray
[signo
] );
1072 TCollection
<CSignature
>::DeleteMarkedMembers();
1076 void CSignatureCollection::GetIndividualCountsForEachStem ()
1079 for (int signo
= 0; signo
< GetCount(); signo
++)
1081 pSig
= GetAt(signo
);
1086 double CSignatureCollection::ComputeDLofInternalPointersOfEachMember(
1087 enum eMDL_STYLE
/*unused*/)
1089 m_SumOfDLofPointersInternalToEachMember
= 0;
1090 for (int signo
= 0; signo
< GetCount(); ++signo
)
1092 CSignature
* sig
= GetAt(signo
);
1093 m_SumOfDLofPointersInternalToEachMember
+=
1094 sig
->GetSumOfDLofInternalPointers();
1096 return m_SumOfDLofPointersInternalToEachMember
;
1099 // MDL JG August 2006
1100 double CSignatureCollection::ComputeLengthOfPointersToEachOfMyMembers (eMDL_STYLE style
)
1102 double Denominator
= 0;
1104 int m_DLofPointersToMyMembers
= 0;
1107 if (style
== CorpusCount
)
1109 for (int signo
= 0; signo
< GetCount(); signo
++)
1111 Denominator
+= GetAt(signo
)->GetCorpusCount();
1113 for (int signo
= 0; signo
< GetCount(); signo
++)
1115 ptr
= base2log ( Denominator
/ GetAt(signo
)->GetCorpusCount() );
1116 GetAt(signo
)->SetLengthOfPointerToMe (ptr
) ;
1117 m_DLofPointersToMyMembers
+= ptr
;
1121 else if (style
== GrammarCount
)
1123 for (int signo
= 0; signo
< GetCount(); signo
++)
1125 Denominator
+= GetAt(signo
)->GetNumberOfStems() * GetAt(signo
)->GetNumberOfAffixes();
1127 for (int signo
= 0; signo
< GetCount(); signo
++)
1129 ptr
= base2log ( Denominator
/ GetAt(signo
)->GetCorpusCount() );
1130 GetAt(signo
)->SetLengthOfPointerToMe (ptr
) ;
1131 m_DLofPointersToMyMembers
+= ptr
;
1134 return m_DLofPointersToMyMembers
;
1136 ///----------------------------------------------------------->>>>>
1137 ///----------------------------------------------------------->>>>>
1139 ///----------------------------------------------------------->>>>>
1140 void CSignatureCollection::FindAllomorphy()
1142 SignatureAlignment
* pSigAlignment
;
1144 GetMiniLexicon()->LogFileLargeTitle("Allomorphy");
1145 CSignature
*pSig
, *qSig
=NULL
;
1146 int MinimumNumberOfStems
= 15;
1147 for (int signo
=0; signo
< GetCount(); signo
++)
1149 pSig
= GetAtSort(signo
);
1150 if (pSig
->GetNumberOfStems() < MinimumNumberOfStems
) {continue;}
1151 if (pSig
->Size() < 2 ) {continue;}
1153 for (int signo2
= signo
+1; signo2
< GetCount(); signo2
++)
1155 qSig
= GetAtSort(signo2
);
1157 if (qSig
->GetNumberOfStems() < MinimumNumberOfStems
) {continue;}
1158 if (qSig
->Size() < 2 ) {continue;}
1160 pSigAlignment
= new SignatureAlignment (pSig
, qSig
);
1161 pSigAlignment
->FindBestAlignment();
1162 if (GetMiniLexicon()->LogFileOn()
1163 && pSigAlignment
->GetAffixAlignments()->count() > 1)
1164 { pSigAlignment
->Display( *GetMiniLexicon()->GetLogFile()); }