HowManyAreAnalyzed(): use status_user_agent to report progress
[linguistica.git] / SignatureCollection.cpp
blob33475c87349e0d481b289f0d8ae548cf1660471a
1 // Implementation of CSignatureCollection methods
2 // Copyright © 2009 The University of Chicago
3 #include "SignatureCollection.h"
5 #include <exception>
6 #include <iostream>
7 #include <memory>
8 #include <QTextStream>
9 #include <QMessageBox>
10 #include <QIODevice>
11 #include <QFile>
12 #include "ui/Status.h"
13 #include "MiniLexicon.h"
14 #include "Lexicon.h"
15 #include "Allomorphy.h"
16 #include "Signature.h"
17 #include "Prefix.h"
18 #include "Suffix.h"
19 #include "Stem.h"
20 #include "PrefixCollection.h"
21 #include "SuffixCollection.h"
22 #include "StemCollection.h"
23 #include "HTML.h"
24 #include "log2.h"
25 // <<-------------------------------------------------------------------------------------------------------->>
26 CSignatureCollection::CSignatureCollection()
28 m_pMiniLex = NULL;
29 m_pLexicon = NULL;
30 m_MemberName = "Signatures";
31 MySuffixes = NULL;
32 MyPrefixes = NULL;
33 m_DLofPointersToMyMembers =0;
35 CSignatureCollection::CSignatureCollection( CMiniLexicon* Lex )
37 m_pMiniLex = Lex;
38 if( m_pMiniLex ) m_pLexicon = m_pMiniLex->GetLexicon();
39 m_MemberName = "Signatures";
40 m_SignatureType = m_pMiniLex->GetAffixLocation();
41 MySuffixes = NULL;
42 MyPrefixes = NULL;
43 m_DLofPointersToMyMembers =0;
46 CSignatureCollection::CSignatureCollection (CMiniLexicon* Lex, CSuffixCollection* suffixes, eAffixLocation AfLoc)
48 m_pMiniLex = Lex;
49 if( m_pMiniLex ) m_pLexicon = m_pMiniLex->GetLexicon();
50 m_MemberName = "Signatures";
51 MySuffixes = suffixes;
52 m_SignatureType = AfLoc;
53 MyPrefixes = NULL;
54 m_DLofPointersToMyMembers =0;
57 CSignatureCollection::CSignatureCollection (CMiniLexicon* Lex, CPrefixCollection* Prefixes, eAffixLocation AfLoc)
59 m_pMiniLex = Lex;
60 if( m_pMiniLex ) m_pLexicon = m_pMiniLex->GetLexicon();
61 m_MemberName = "Signatures";
62 MyPrefixes = Prefixes;
63 m_SignatureType = AfLoc;
64 MySuffixes = NULL;
65 m_DLofPointersToMyMembers =0;
68 CSignatureCollection::CSignatureCollection (eAffixLocation SigType)
70 m_pMiniLex = NULL;
71 m_pLexicon = NULL;
72 m_SignatureType = SigType;
73 m_MemberName = "Signatures";
74 MySuffixes = NULL;
75 MyPrefixes = NULL;
76 m_DLofPointersToMyMembers =0;
79 // <<-------------------------------------------------------------------------------------------------------->>
80 CSignatureCollection::~CSignatureCollection()
82 if( m_pLexicon )
84 CSignature* pSig;
86 for( int signo = 0; signo < GetCount(); signo++ )
88 pSig = GetAt(signo);
89 if( m_pLexicon )
91 switch( m_SignatureType )
93 case STEM_INITIAL:
94 case WORD_INITIAL:
95 m_pLexicon->RemovePrefixSig( pSig );
96 break;
97 case WORD_FINAL:
98 case STEM_FINAL:
99 default:
100 m_pLexicon->RemoveSuffixSig( pSig );
107 //==============================================================================================//
110 // Operators
112 //==============================================================================================//
114 CSignature* CSignatureCollection::operator^= (QString Signature) //" Lookup"
116 if( Signature.length() < 1 ) return NULL;
118 CNode *pNode = Find1 ( CStringSurrogate( Signature.unicode(),0,Signature.length() ) );
119 if ( pNode )
121 return (CSignature*) pNode->Get_T_Pointer();
123 else { return NULL; }
126 CSignature* CSignatureCollection::operator^= (CParse& Parse) //" Lookup"
128 // Return NULL if parse is empty.
129 if( Parse.GetKeyLength() < 1 || Parse.Size() < 1 ) return NULL;
131 Parse.Alphabetize();
133 QString display = Parse.Display('.');
134 CNode *pNode = Find1 ( CStringSurrogate( display.unicode(),0,display.length() ) );
135 if ( pNode )
137 return (CSignature*) pNode->Get_T_Pointer();
139 else { return NULL; }
142 // <<-------------------------------------------------------------------------------------------------------->>
143 CSignature* CSignatureCollection::operator^= (CParse* pParse) //" Lookup"
145 // Return NULL if parse is empty.
146 if( pParse->GetKeyLength() < 1 || pParse->Size() < 1 ) return NULL;
148 pParse->Alphabetize();
150 QString display = pParse->Display('.');
151 CNode *pNode = Find1 ( CStringSurrogate( display.unicode(),0,display.length() ) );
152 if ( pNode )
154 return (CSignature*) pNode->Get_T_Pointer();
156 else { return NULL; }
158 // <<-------------------------------------------------------------------------------------------------------->>
159 CSignature* CSignatureCollection::operator^= (CStringSurrogate& Signature) //" Lookup"
161 // Return NULL if Signature is empty.
162 if( Signature.GetLength() < 1 ) return NULL;
164 CNode *pNode = Find1 ( Signature );
165 if ( pNode )
167 return (CSignature*) pNode->Get_T_Pointer();
169 else { return NULL; }
173 // <<-------------------------------------------------------------------------------------------------------->>
174 CSignature* CSignatureCollection::operator<< (CParse* pParse)
176 CSignature* pSig;
177 CNode* pTerminal;
178 int Result;
179 CSuffix* pSuffix;
180 CPrefix* pPrefix;
182 pParse->Alphabetize(); // Jan 2009 JG
184 QString display = pParse->Display('.');
185 CParse SpelledOutSig = CStringSurrogate(display.unicode(),0,display.length());
188 pTerminal = Insert (SpelledOutSig.GetKey(), &Result);
189 if ( Result == 1)
191 pSig = new CSignature( pParse, m_pMiniLex );
192 pTerminal->SetPointer( pSig );
193 if (m_pMiniLex)
195 switch (m_SignatureType)
197 case (WORD_INITIAL):
198 case (STEM_INITIAL):
199 for (int affixno = 1; affixno <= pParse->Size(); affixno++)
201 pPrefix = *m_pMiniLex->GetPrefixes() ^= pParse->GetPiece(affixno);
202 if(pPrefix)
204 pSig->AppendPrefixPtr( pPrefix );
207 break;
208 case (WORD_FINAL):
209 case (STEM_FINAL):
210 default:
211 for (int affixno = 1; affixno <= pParse->Size(); affixno++)
213 pSuffix = *m_pMiniLex->GetSuffixes() ^= pParse->GetPiece(affixno);
214 if(pSuffix)
216 pSig->AppendSuffixPtr( pSuffix );
222 else
224 pSig =(CSignature*) pTerminal->Get_T_Pointer();
227 if( m_pLexicon )
229 switch( m_SignatureType )
231 case STEM_INITIAL:
232 case WORD_INITIAL:
233 m_pLexicon->InsertPrefixSig( pSig );
234 break;
235 case WORD_FINAL:
236 case STEM_FINAL:
237 default:
238 m_pLexicon->InsertSuffixSig( pSig );
242 IncrementCorpusCount(1);
243 pSig->IncrementCorpusCount(1);
245 m_SortValidFlag = FALSE;
246 m_HashHasChangedFlag = TRUE;
248 pSig->SetLexicon( m_pMiniLex );
249 pSig->SetSignatureCollection ( this );
251 //todo
252 pSig->SetAffixLocation ( m_SignatureType );
254 return pSig;
259 CSignature* CSignatureCollection::operator<< (CSignature* Sig)
261 CSignature* pSig = NULL;
262 CNode* pTerminal;
263 int Result = 0;
264 int affixno;
266 Sig ->Alphabetize();
267 CParse SpelledOutSig = CStringSurrogate(Sig->Display('.').unicode(),0,Sig->Display('.').length());
270 pTerminal = Insert (SpelledOutSig.GetKey(), &Result);// CAUSED PROBLEM!!!!!!
272 if ( Result == 1)
274 pSig = new CSignature(*Sig);
275 pTerminal->SetPointer (pSig);
276 if (m_pMiniLex)
278 switch (m_SignatureType)
280 case WORD_INITIAL:
281 case STEM_INITIAL:
283 for ( affixno = 1; affixno <= Sig->Size(); affixno++)
285 CPrefix* pPrefix = *m_pMiniLex->GetPrefixes() ^= Sig->GetPiece(affixno);
286 if(pPrefix) {
287 pSig->AppendPrefixPtr( pPrefix );
291 break;
292 case WORD_FINAL:
293 case STEM_FINAL:
294 default:
296 for ( affixno = 1; affixno <= Sig->Size(); affixno++)
298 CSuffix* pSuffix = *m_pMiniLex->GetSuffixes() ^= Sig->GetPiece(affixno);
299 if(pSuffix) {
300 pSig->AppendSuffixPtr( pSuffix );
307 else
309 pSig =(CSignature*) pTerminal->Get_T_Pointer();
313 if( m_pLexicon )
315 switch( m_SignatureType ) {
316 case STEM_INITIAL:
317 case WORD_INITIAL:
318 m_pLexicon->InsertPrefixSig( pSig );
319 break;
320 case WORD_FINAL:
321 case STEM_FINAL:
322 default:
323 m_pLexicon->InsertSuffixSig( pSig );
326 IncrementCorpusCount(1);
327 pSig ->IncrementCorpusCount(1);
328 m_SortValidFlag = FALSE;
329 m_HashHasChangedFlag = TRUE;
330 pSig->SetLexicon( m_pMiniLex );
331 pSig->SetSignatureCollection ( this );
332 //todo
333 pSig ->SetAffixLocation ( m_SignatureType );
334 return pSig;
338 //==============================================================================================//
343 //==============================================================================================//
344 void CSignatureCollection::SetMyPrefixes(CPrefixCollection* pAC){ MyPrefixes = pAC;}
345 void CSignatureCollection::SetMySuffixes(CSuffixCollection* pAC){ MySuffixes = pAC;}
348 void CSignatureCollection::FindDisplayOrdering()
350 int Size = GetCount();
351 CSignature* pSig,
352 *qSig;
355 Sort (SIGS);
357 for (int signo = 0; signo < Size; signo++)
359 pSig = GetAtSort(signo); // We're looking for pSig's mentor, if it has one
360 if (pSig->Size() < 2) continue;
361 for (int signo2 = 0; signo2 < signo; signo2++)
363 qSig = GetAtSort(signo2);
364 if ( qSig->Contains(pSig) )
366 pSig->SetMentor (qSig);
367 break;
369 else pSig->SetMentor( NULL );
373 m_SortStyle = SIG_MENTORS;
374 m_SortValidFlag = TRUE;
377 void CSignatureCollection::ListDisplay(
378 Q3ListView* pView, QMap<QString, QString>* filter)
380 CLexicon& lex = *m_pLexicon;
381 linguistica::ui::status_user_agent& status = lex.status_display();
383 // XXX. make these adjustable by user.
384 int MinimumNumberOfStemsForDisplay = 2;
385 int MinimumNumberOfAffixesForDisplay = 2;
387 if (GetCount() < 20)
388 MinimumNumberOfStemsForDisplay = 1;
390 pView->setSorting(6);
392 // Remove all previous columns
393 while (pView->columns() != 0)
394 pView->removeColumn(0);
395 pView->clear();
397 // Add Column headers
398 pView->addColumn("Signatures");
399 pView->addColumn("Exemplar");
400 pView->addColumn("Descr. Length", 100);
401 pView->addColumn("Corpus Count", 100);
402 pView->addColumn("Stem Count", 100);
403 pView->addColumn("Source");
404 pView->addColumn("Robustness");
406 pView->setColumnAlignment(0, Qt::AlignLeft);
407 pView->setColumnAlignment(1, Qt::AlignCenter);
408 pView->setColumnAlignment(2, Qt::AlignRight);
409 pView->setColumnAlignment(3, Qt::AlignCenter);
410 pView->setColumnAlignment(4, Qt::AlignCenter);
411 pView->setColumnAlignment(5, Qt::AlignCenter);
412 pView->setColumnAlignment(6, Qt::AlignCenter);
414 status.major_operation = "Creating signature list for display";
415 status.progress.clear();
416 FindDisplayOrdering();
417 status.progress.set_denominator(GetCount()-1);
418 for (int signo = GetCount()-1; signo >=0 ; signo--) {
419 CSignature* pSig = GetAtSort(signo);
420 status.progress = GetCount()-1 - signo;
421 if (pSig->GetMentor())
422 continue;
423 if (pSig->GetNumberOfStems() < MinimumNumberOfStemsForDisplay)
424 continue;
425 if (pSig->Size() < MinimumNumberOfAffixesForDisplay)
426 continue;
428 CSignatureListViewItem* item = new CSignatureListViewItem(
429 pView, pSig->Express(), m_pMiniLex->GetIndex(), pSig, filter);
430 if (pSig->GetMentorList()) {
431 for (int signo2 = 0; signo2 < pSig->GetMentorList()->size(); signo2++) {
432 CSignature* qSig = pSig->GetMentorList()->at(signo2);
433 if (qSig->GetNumberOfStems() < MinimumNumberOfStemsForDisplay)
434 continue;
435 static_cast<void>(new CSignatureListViewItem(
436 item, qSig->Display(), m_pMiniLex->GetIndex(), qSig, filter));
438 item->setOpen(true);
441 status.progress.clear();
442 status.major_operation.clear();
445 void CSignatureCollection::BorrowedSigsDisplay(
446 Q3ListView* pView, QMap<QString, QString>* filter)
448 CLexicon& lex = *m_pLexicon;
449 linguistica::ui::status_user_agent& status = lex.status_display();
451 // Remove all previous columns
452 while (pView->columns() != 0)
453 pView->removeColumn(0);
455 // Add Column headers
456 pView->addColumn("Signatures");
457 pView->addColumn("Source");
459 // Display each item
460 status.major_operation = "Creating signature list for display";
461 status.progress.clear();
462 status.progress.set_denominator(GetCount());
463 for (int signo = 0; signo < (int)GetCount(); signo++) {
464 GetAt(signo)->BorrowedSigsDisplay(pView, filter);
465 status.progress = signo;
467 status.progress.clear();
468 status.major_operation.clear();
471 ////////////////////////////////////////////////////
472 ////////////////////////////////////////////////////
475 // Verbose Output
478 ////////////////////////////////////////////////////
479 ////////////////////////////////////////////////////
482 void CSignatureCollection::OutputSignatures( QString FileName )
484 QFile file( FileName );
486 if( file.open( QIODevice::WriteOnly ) )
488 QTextStream outf( &file );
489 outf.setEncoding( QTextStream::Unicode );
491 outf << "# Signature Count" << endl;
492 outf << "# ---------------" << endl;
493 outf << " " << GetCount() << " signatures" << endl << endl;
496 Sort( CORPUSCOUNT );
497 for (int i = 0; i < (int)GetCount(); i++)
499 GetAtSort(i)->OutputSignature( outf );
502 file.close();
506 void CSignatureCollection::OutputXfst( QString FileName )
508 QFile file( FileName );
510 if( file.open( IO_WriteOnly ) )
512 QTextStream outf( &file ); //Should be ascii file, not unicode
514 outf << "# " << endl;
515 outf << "# File: " << FileName << endl;
516 outf << "# Signature count: " << GetCount() << endl;
517 outf << "# " << endl;
518 // Sort( CORPUSCOUNT );
519 Sort( SIG_MENTORS );
520 for (int i = 0; i < (int)GetCount(); i++)
522 GetAtSort(i)->OutputSignatureXfst( outf, i+1 );
525 outf << endl;
526 outf << "union net" << endl << endl;
527 outf << "print words" << endl << endl;
529 file.close();
535 ////////////////////////////////////////////////////
536 ////////////////////////////////////////////////////
539 // Limited Output
542 ////////////////////////////////////////////////////
543 ////////////////////////////////////////////////////
545 /*void CSignatureCollection::LimitedOutput (QString Filename)
547 QFile file( Filename );
549 if( file.open( IO_WriteOnly ) )
551 QTextStream outf( &file );
552 int TotalWordCount = 0;
553 CSignature* pSig;
554 QString dummy;
555 int i = 0,
556 counter = 0;
557 int NumEntries = GetCount();
559 outf.setf(2); // Set fields left justified
560 outf << "# Index Signature StemCount AffixCount log(StemCount)*log(AffixCount)" << endl << endl;
562 Sort(SIGS);
563 for( i = 0; i < NumEntries; i++ )
565 pSig = GetAtSort(i);
567 outf << ++counter << " ";
568 outf << pSig -> Display( '.', m_pLexicon->GetOutFilter() ) << " ";
569 outf << pSig -> GetStems().Size() << " ";
570 outf << pSig->GetNumberOfAffixes()
571 << " " << log( pSig->GetStems().Size() ) * log ( pSig->GetNumberOfAffixes() )
572 << endl;
574 TotalWordCount += pSig->GetStemPtrList()->count() * pSig->GetNumberOfAffixes();
577 outf << endl << "Total number of words covered: " << TotalWordCount;
579 file.close();
581 return;
584 namespace {
585 struct cannot_parse_input : virtual std::exception { };
587 /// skip blank lines and comments
588 QString get_line(QTextStream& in)
590 QString buf;
591 do {
592 buf = in.readLine();
593 } while (buf.isEmpty() || buf[0] == '#');
594 return buf;
597 /// swallow end of line, throwing an exception if that involves
598 /// useful data.
599 void check_end_of_line(QTextStream& in)
601 QString remainder = in.readLine();
602 if (!remainder.isEmpty())
603 throw cannot_parse_input();
606 int string_to_int(QString s)
608 bool ok;
609 int result = s.toInt(&ok);
610 if (!ok)
611 throw cannot_parse_input();
612 return result;
616 void CSignatureCollection::ReadSignatureFile(QString Filename,
617 enum eAffixLocation SigType) { try
619 QFile file(Filename);
620 if (!file.open(QIODevice::ReadOnly))
621 return;
623 QTextStream inf(&file);
625 const int signature_count = string_to_int(
626 get_line(inf).trimmed());
628 delete[] m_PointerArray;
629 m_PointerArray = new CSignature*[signature_count];
631 for (int count = 1; count <= signature_count; ++count) {
632 QString sig_header = get_line(inf).trimmed();
634 // line 1:
635 // SP+ signature SP+ stem count SP+ corpus count SP+
636 QTextStream line_in(&sig_header, QIODevice::ReadOnly);
637 QString sig_graphemes, stem_count_text,
638 corpus_count_text;
639 line_in >> sig_graphemes >>
640 stem_count_text >> corpus_count_text;
641 check_end_of_line(line_in);
643 const QString sig_text = Filter(m_pLexicon->GetInFilter(),
644 sig_graphemes);
645 const int stem_count = string_to_int(stem_count_text);
646 const int corpus_count = string_to_int(corpus_count_text);
648 // line 2: signature origin
649 QString remark = get_line(inf).trimmed();
650 remark.replace(QChar('_'), QChar(' '));
652 std::auto_ptr<CSignature> sig(new CSignature(
653 SigType, m_pMiniLex));
654 sig->IngestSignature(sig_text);
655 sig->SetCorpusCount(corpus_count);
656 sig->SetRemark(remark);
657 sig->SetSignatureCollection(this);
659 for (int i = 0; i < stem_count; ++i) {
660 QString stem;
661 inf >> stem;
662 // We haven’t read the Stems.txt file
663 // yet, so just swallow each stem here.
664 // The stems will be read from Signatures.txt
665 // when it is read again in
666 // ReadSignatureFileBis.
669 CNode* terminal = Insert(sig_text);
670 m_PointerArray[GetCount() - 1] = sig.get();
671 terminal->SetPointer(sig.release());
673 } catch (cannot_parse_input) {
674 // XXX. report to user
675 std::cerr << "Signature.txt: cannot parse" << std::endl;
679 void CSignatureCollection::ReadSignatureFileBis(QString Filename) { try
681 CStemCollection* stems_ptr = m_pMiniLex->GetStems();
682 if (stems_ptr == 0)
683 return;
684 CStemCollection& stems = *stems_ptr;
686 QFile file(Filename);
687 if (!file.open(QIODevice::ReadOnly))
688 return;
689 QTextStream inf(&file);
691 const int signature_count = string_to_int(
692 get_line(inf).trimmed());
694 for (int signo = 0; signo < signature_count; ++signo) {
695 // see ReadSignatureFile().
696 QString sig_header = get_line(inf).trimmed();
697 QTextStream line_in(&sig_header, QIODevice::ReadOnly);
698 QString sig_graphemes, stem_count_text,
699 corpus_count_text;
700 line_in >> sig_graphemes >>
701 stem_count_text >> corpus_count_text;
702 check_end_of_line(line_in);
704 const QString sig_text = Filter(m_pLexicon->GetInFilter(),
705 sig_graphemes);
706 const int stem_count = string_to_int(stem_count_text);
708 CParse sig_parse;
709 sig_parse.IngestSignature(sig_text);
710 CSignature* sig = *this ^= sig_parse;
711 Q_ASSERT(sig != 0);
713 for (int stemno = 0; stemno < stem_count; ++stemno) {
714 QString stem_graphemes;
715 inf >> stem_graphemes;
717 const QString stem_text = Filter(
718 m_pLexicon->GetInFilter(), stem_graphemes);
720 CStem* stem = stems ^= stem_text;
721 if (stem == 0)
722 // XXX. stem missing from Stems.txt
723 continue;
725 sig->AppendStemPtr(stem);
728 if (is_initial(sig->GetAffixLocation())) {
729 for (int stemno = 0; stemno < sig->GetNumberOfStems(); stemno++)
731 CStem* stem = sig->GetStem(stemno);
732 for (int affixno = 1; affixno <= sig->Size(); ++affixno) {
733 CPrefix* affix =
734 *m_pMiniLex->GetPrefixes() ^=
735 sig->GetPiece(affixno);
736 if (affix == 0)
737 throw cannot_parse_input();
738 affix->AddStem(stem);
741 } else {
742 for (int stemno = 0; stemno < sig->GetNumberOfStems(); stemno++)
744 CStem* stem = sig->GetStem(stemno);
745 for (int affixno = 1; affixno <= sig->Size(); ++affixno) {
746 CSuffix* affix =
747 *m_pMiniLex->GetSuffixes() ^=
748 sig->GetPiece(affixno);
749 if (affix == 0)
750 throw cannot_parse_input();
751 affix->AddStem(stem);
756 Sort(SIGS);
757 } catch (cannot_parse_input) {
758 // XXX. report to user
759 std::cerr << "Signature.txt: cannot re-parse" << std::endl;
763 void CSignatureCollection::CheckRobustness()
765 CLexicon& lex = *m_pLexicon;
766 linguistica::ui::status_user_agent& status = lex.status_display();
768 status.major_operation = "Checking sig robustness";
769 status.progress.clear();
770 Sort(SIGS);
771 status.progress.set_denominator(GetCount());
772 for (int signo = 1; signo < (int)GetCount(); signo++) {
773 CSignature* pSig = GetAtSort(signo);
774 status.progress = signo;
775 for (int signo2 = 0; signo2 < signo; signo2++) {
776 CSignature* qSig = GetAtSort(signo2);
777 if (qSig->Contains(pSig)) {
778 pSig->SetRobustness(qSig->GetRobustness());
779 break;
783 status.progress.clear();
785 // XXX. not an operation
786 status.major_operation = "Robustness checking complete.";
789 int CSignatureCollection::GetTotalNumberOfWords()
791 int Total = 0;
792 for (int signo = 0; signo < (int)GetCount(); signo++)
794 Total += GetAt(signo)->GetNumberOfStems() * GetAt(signo)->Size();
796 return Total;
800 int CSignatureCollection::TheseTwoSuffixesShareHowManyStems(CSuffix* pSuffix1, CSuffix* pSuffix2)
802 CSignature * pSig;
803 int count = 0;
804 for (int signo = 0; signo < (int)GetCount(); signo++)
806 pSig= GetAt(signo);
807 if ( pSig->Contains (pSuffix1) && pSig->Contains (pSuffix2) )
809 count+= pSig->GetNumberOfStems();
812 return count;
815 void CSignatureCollection::CleanUp()
817 CSignature* pSig;
819 for (int signo = 0; signo < (int) GetCount(); signo++)
821 pSig = GetAt(signo);
822 if ( pSig->GetNumberOfStems() <= 0 || pSig->GetCorpusCount() <= 0 ) // -cs- 20040906 : added the second argument
824 // -cs- 20040602 : DeleteMarkedMembers wasn't actually finding any of the
825 // members to be to be deleted, so I changed it to remove them automatically,
826 // this fixed our word display bug (words weren't connected to their signature
827 RemoveMember(pSig);
832 void CSignatureCollection::AddPointer( CSignature* pSignature )
834 TCollection<CSignature>::AddPointer( pSignature );
836 if( m_pLexicon )
838 switch( m_SignatureType )
840 case STEM_INITIAL:
841 case WORD_INITIAL:
842 m_pLexicon->InsertPrefixSig( pSignature );
843 break;
844 case WORD_FINAL:
845 case STEM_FINAL:
846 default:
847 m_pLexicon->InsertSuffixSig( pSignature );
853 CSignature* CSignatureCollection::AddToCollection( CParse& Signature )
855 CSignature* pSignature = TCollection<CSignature>::AddToCollection( Signature );
857 if( m_pLexicon )
859 switch( m_SignatureType )
861 case STEM_INITIAL:
862 case WORD_INITIAL:
863 m_pLexicon->InsertPrefixSig( pSignature );
864 break;
865 case WORD_FINAL:
866 case STEM_FINAL:
867 default:
868 m_pLexicon->InsertSuffixSig( pSignature );
872 return pSignature;
876 CSignature* CSignatureCollection::AddToCollection( CStringSurrogate& Signature )
878 CSignature* pSignature = TCollection<CSignature>::AddToCollection( Signature );
880 if( m_pLexicon )
882 switch( m_SignatureType )
884 case STEM_INITIAL:
885 case WORD_INITIAL:
886 m_pLexicon->InsertPrefixSig( pSignature );
887 break;
888 case WORD_FINAL:
889 case STEM_FINAL:
890 default:
891 m_pLexicon->InsertSuffixSig( pSignature );
895 return pSignature;
899 void CSignatureCollection::Empty()
901 if( m_pLexicon )
903 CSignature* pSignature;
905 for( int signo = 0; signo < GetCount(); signo++ )
907 pSignature = GetAt(signo);
909 switch( m_SignatureType )
911 case STEM_INITIAL:
912 case WORD_INITIAL:
913 Q_ASSERT( m_pLexicon->RemovePrefixSig( pSignature ) );
914 break;
915 case WORD_FINAL:
916 case STEM_FINAL:
917 default:
918 Q_ASSERT( m_pLexicon->RemoveSuffixSig( pSignature ) );
923 TCollection<CSignature>::Empty();
927 void CSignatureCollection::RemoveAll()
929 if( m_pLexicon )
931 CSignature* pSignature;
933 for( int signo = 0; signo < GetCount(); signo++ )
935 pSignature = GetAt(signo);
937 switch( m_SignatureType )
939 case STEM_INITIAL:
940 case WORD_INITIAL:
941 Q_ASSERT( m_pLexicon->RemovePrefixSig( pSignature ) );
942 break;
943 case WORD_FINAL:
944 case STEM_FINAL:
945 default:
946 Q_ASSERT( m_pLexicon->RemoveSuffixSig( pSignature ) );
951 TCollection<CSignature>::RemoveAll();
955 bool CSignatureCollection::Remove( CSignature* pSignature )
958 if( m_pLexicon )
960 switch( m_SignatureType )
962 case STEM_INITIAL:
963 case WORD_INITIAL:
964 Q_ASSERT( m_pLexicon->RemovePrefixSig( pSignature ) );
965 break;
966 case WORD_FINAL:
967 case STEM_FINAL:
968 default:
969 Q_ASSERT( m_pLexicon->RemoveSuffixSig( pSignature ) );
970 //int dummy;
974 return TCollection<CSignature>::Remove( pSignature );
978 bool CSignatureCollection::RemoveMember( CSignature* pSignature )
981 if( m_pLexicon )
983 switch( m_SignatureType )
985 case STEM_INITIAL:
986 case WORD_INITIAL:
987 Q_ASSERT( m_pLexicon->RemovePrefixSig( pSignature ) );
988 break;
989 case WORD_FINAL:
990 case STEM_FINAL:
991 default:
992 Q_ASSERT( m_pLexicon->RemoveSuffixSig( pSignature ) );
996 return TCollection<CSignature>::RemoveMember( pSignature );
1000 bool CSignatureCollection::RemoveMember( CStringSurrogate& Signature )
1002 CSignature* pSignature = (CSignature*)Find1( Signature )->Get_T_Pointer();
1004 if( m_pLexicon )
1006 switch( m_SignatureType )
1008 case STEM_INITIAL:
1009 case WORD_INITIAL:
1010 m_pLexicon->RemovePrefixSig( pSignature ) ;
1011 break;
1012 case WORD_FINAL:
1013 case STEM_FINAL:
1014 default:
1015 Q_ASSERT( m_pLexicon->RemoveSuffixSig( pSignature ) );
1019 return TCollection<CSignature>::RemoveMember( Signature );
1023 bool CSignatureCollection::RemoveMember( CStringSurrogate& Signature, bool b )
1025 CSignature* pSignature = (CSignature*)Find1( Signature )->Get_T_Pointer();
1027 if( m_pLexicon )
1029 switch( m_SignatureType )
1031 case STEM_INITIAL:
1032 case WORD_INITIAL:
1033 m_pLexicon->RemovePrefixSig( pSignature ) ;
1034 break;
1035 case WORD_FINAL:
1036 case STEM_FINAL:
1037 default:
1038 m_pLexicon->RemoveSuffixSig( pSignature ) ;
1042 return TCollection<CSignature>::RemoveMember( Signature, b );
1046 void CSignatureCollection::DeleteMarkedMembers()
1048 if ( m_DeletionArray == NULL ) { return; }
1050 int count = GetCount();
1051 for (int signo = 0; signo < count; signo++)
1053 if ( m_DeletionArray[signo] == 1 )
1055 if( m_pLexicon )
1057 switch( m_SignatureType )
1059 case STEM_INITIAL:
1060 case WORD_INITIAL:
1061 m_pLexicon->RemovePrefixSig( m_PointerArray[signo] );
1062 break;
1063 case WORD_FINAL:
1064 case STEM_FINAL:
1065 default:
1066 m_pLexicon->RemoveSuffixSig( m_PointerArray[signo] );
1072 TCollection<CSignature>::DeleteMarkedMembers();
1076 void CSignatureCollection::GetIndividualCountsForEachStem ()
1078 CSignature* pSig;
1079 for (int signo = 0; signo < GetCount(); signo++)
1081 pSig = GetAt(signo);
1086 double CSignatureCollection::ComputeDLofInternalPointersOfEachMember(
1087 enum eMDL_STYLE /*unused*/)
1089 m_SumOfDLofPointersInternalToEachMember = 0;
1090 for (int signo = 0; signo < GetCount(); ++signo)
1092 CSignature* sig = GetAt(signo);
1093 m_SumOfDLofPointersInternalToEachMember +=
1094 sig->GetSumOfDLofInternalPointers();
1096 return m_SumOfDLofPointersInternalToEachMember;
1099 // MDL JG August 2006
1100 double CSignatureCollection::ComputeLengthOfPointersToEachOfMyMembers (eMDL_STYLE style )
1102 double Denominator = 0;
1103 double ptr;
1104 int m_DLofPointersToMyMembers = 0;
1107 if (style == CorpusCount )
1109 for (int signo = 0; signo < GetCount(); signo++)
1111 Denominator += GetAt(signo)->GetCorpusCount();
1113 for (int signo = 0; signo < GetCount(); signo++)
1115 ptr = base2log ( Denominator/ GetAt(signo)->GetCorpusCount() );
1116 GetAt(signo)->SetLengthOfPointerToMe (ptr ) ;
1117 m_DLofPointersToMyMembers += ptr;
1121 else if (style == GrammarCount )
1123 for (int signo = 0; signo < GetCount(); signo++)
1125 Denominator += GetAt(signo)->GetNumberOfStems() * GetAt(signo)->GetNumberOfAffixes();
1127 for (int signo = 0; signo < GetCount(); signo++)
1129 ptr = base2log ( Denominator/ GetAt(signo)->GetCorpusCount() );
1130 GetAt(signo)->SetLengthOfPointerToMe (ptr ) ;
1131 m_DLofPointersToMyMembers += ptr;
1134 return m_DLofPointersToMyMembers;
1136 ///----------------------------------------------------------->>>>>
1137 ///----------------------------------------------------------->>>>>
1138 // Allomorphy
1139 ///----------------------------------------------------------->>>>>
1140 void CSignatureCollection::FindAllomorphy()
1142 SignatureAlignment* pSigAlignment;
1144 GetMiniLexicon()->LogFileLargeTitle("Allomorphy");
1145 CSignature *pSig, *qSig=NULL;
1146 int MinimumNumberOfStems = 15;
1147 for (int signo =0; signo < GetCount(); signo++)
1149 pSig = GetAtSort(signo);
1150 if (pSig->GetNumberOfStems() < MinimumNumberOfStems ) {continue;}
1151 if (pSig->Size() < 2 ) {continue;}
1153 for (int signo2 = signo+1; signo2 < GetCount(); signo2++)
1155 qSig = GetAtSort(signo2);
1157 if (qSig->GetNumberOfStems() < MinimumNumberOfStems ) {continue;}
1158 if (qSig->Size() < 2 ) {continue;}
1160 pSigAlignment = new SignatureAlignment (pSig, qSig);
1161 pSigAlignment->FindBestAlignment();
1162 if (GetMiniLexicon()->LogFileOn()
1163 && pSigAlignment->GetAffixAlignments()->count() > 1)
1164 { pSigAlignment->Display( *GetMiniLexicon()->GetLogFile()); }