CMiniLexicon::FindMajorSignatures(): use log file routines
[linguistica.git] / Template.cpp
blob32784cdad9dcd2c2c8907eabf32965d1db979edd
1 // Implementation of CTemplate methods
2 // Copyright © 2009 The University of Chicago
3 #include "Template.h"
5 #include <QMessageBox>
6 #include <Q3TextStream>
7 #include <QIODevice>
8 #include <QFile>
9 #include "Lexicon.h"
10 #include "Alignment.h"
11 #include "TemplateCollection.h"
12 #include "WordCollection.h"
13 #include "generaldefinitions.h"
14 #include "Parse.h"
15 #include "log2.h"
17 extern double g_Lambda ;
19 CTemplate::CTemplate(int NumberOfColumns)
21 m_NumberOfColumns = NumberOfColumns;
22 m_Columns = new CParse*[ m_NumberOfColumns ];
24 for (int i = 0; i < NumberOfColumns; i++)
26 m_Columns[i] = new CParse();
28 m_Complexity = 0;
29 m_WordsTotalComplexity = 0;
30 m_TemplateNumber = 0;
32 m_IsDeleted = false;
33 m_ModifiedColumn = -1;
34 m_IsNewAfterCollapse1 = false;
35 m_StemColumnInCollapse1 =-1;
36 m_SwitchOfSortingValue = false;
40 CTemplate::CTemplate(CTemplate& Template) : CStem ( Template.Display() )
43 m_NumberOfColumns = Template.m_NumberOfColumns;
44 m_Columns = new CParse* [ m_NumberOfColumns ];
45 for (int i = 0; i < m_NumberOfColumns; i++)
48 m_Columns[i] = new CParse ( *Template.m_Columns[i]);
49 Q_ASSERT ( m_Columns[i]->Size() > 0 );
52 m_WordsTotalComplexity = Template.GetWordsTotalComplexity();
53 m_Complexity = Template.GetComplexity();
54 m_TemplateNumber = Template.m_TemplateNumber;
56 m_IsDeleted = Template.m_IsDeleted;
57 m_ModifiedColumn = Template.m_ModifiedColumn;
58 m_IsNewAfterCollapse1 = Template.m_IsNewAfterCollapse1;
59 m_StemColumnInCollapse1 = Template.m_StemColumnInCollapse1;
60 m_SwitchOfSortingValue = Template.m_SwitchOfSortingValue;
64 CTemplate::CTemplate(CAlignment* pAlign) : CStem ( pAlign->SpellOut() )
67 int loc1=0, loc2=0;
68 int StartLoc1, StartLoc2;
69 int col = 0;
71 if ( pAlign->m_Slips != 1 ) { return; }
73 QString debugstring1, debugstring2;
74 const char* CCDebugString1, *CCDebugString2;
76 debugstring1 = pAlign ->m_Str1 ->GetKey().Display();
77 CCDebugString1 = debugstring1.ascii();
78 debugstring2 = pAlign ->m_Str2 ->GetKey().Display();
79 CCDebugString2 = debugstring2.ascii();
82 m_NumberOfColumns = pAlign->m_Spans;
83 m_Columns = new CParse*[ m_NumberOfColumns ];
85 for (int c = 0; c < m_NumberOfColumns; c++)
87 m_Columns[c] = new CParse();
91 // it always does this loop at least once, because of initial "#" which is shared
92 while ( pAlign->PerfectMatch (loc1, loc2) )
94 loc1++;
95 loc2++;
98 if ( loc1 > 1 )
99 { // there is an initial shared span...
100 m_Columns[col]->Append( CStringSurrogate (pAlign->m_Str1->GetKeyPointer(),0 , loc1 ) );
101 col++;
102 StartLoc1 = loc1;
103 StartLoc2 = loc2;
105 else // there is no initial shared span, and we'll back up to put the # in each piece
107 StartLoc1 = loc1 - 1;
108 StartLoc2 = loc2 - 1;
113 while ( loc1 < pAlign->m_Length1 &&
114 pAlign->m_Str2->GetChar( pAlign->m_Match1[loc1] ) != pAlign->m_Str1->GetChar( loc1 )
117 loc1++;
120 while ( loc2 < pAlign->m_Length2 &&
121 pAlign->m_Str1->GetChar( pAlign->m_Match2[loc2] ) != pAlign->m_Str2->GetChar(loc2) )
123 loc2++;
127 CStringSurrogate Piece1 ( pAlign->m_Str1->GetKeyPointer(), StartLoc1 , loc1 - StartLoc1 );
128 CStringSurrogate Piece2 ( pAlign->m_Str2->GetKeyPointer(), StartLoc2 , loc2 - StartLoc2 );
131 if ( Piece1.GetLength () == 0 )
133 // m_Columns[col]->AppendInAlphabeticalOrder( CStringSurrogate(QString("NULL").unicode(), 0, 4));
134 m_Columns[col]->Append(CStringSurrogate(QString("NULL").unicode(), 0, 4));
136 else
138 m_Columns[col]->Append(Piece1);
139 // m_Columns[col]->AppendInAlphabeticalOrder( Piece1, true);
143 if ( Piece2.GetLength () == 0 )
145 // m_Columns[col]->AppendInAlphabeticalOrder( CStringSurrogate(QString("NULL").unicode(), 0, 4), true );
146 m_Columns[col]->Append(
147 CStringSurrogate(QString("NULL").unicode(), 0, 4));
149 else
151 // m_Columns[col]->AppendInAlphabeticalOrder( Piece2, true);
152 m_Columns[col]->Append(Piece2);
155 // yuhuask when slip == 1 Only have three cases: yn or yny
156 if ( col + 1 < m_NumberOfColumns )
158 col++;
159 // m_Columns[col]->AppendInAlphabeticalOrder( CStringSurrogate (pAlign->m_Str1->GetKeyPointer(), loc1,pAlign->m_Str1->GetKeyLength() - loc1 ), true );
160 m_Columns[col]->Append(CStringSurrogate (pAlign->m_Str1->GetKeyPointer(), loc1,pAlign->m_Str1->GetKeyLength() - loc1));
163 for (int i = 1; i < m_NumberOfColumns; i++)
165 Q_ASSERT(m_Columns[i]->Size() > 0 );
169 Q_ASSERT (m_Columns[0]->GetChar(0) == '#');
170 m_TemplateNumber = 0;
173 m_ModifiedColumn = -1;
174 m_IsDeleted = false;
175 m_IsNewAfterCollapse1 = false;
176 m_StemColumnInCollapse1 =-1;
177 m_SwitchOfSortingValue = false;
178 return ;
183 void CTemplate::operator= (CAlignment* pAlign)
186 int loc1= 0, loc2=0;
187 int StartLoc1, StartLoc2;
188 int col = 0;
190 if ( m_NumberOfColumns )
192 for (int i = 0; i < m_NumberOfColumns; i++)
194 if ( m_Columns[i] ) delete m_Columns[i];
198 if ( pAlign->m_Slips != 1 ) { return; }
200 m_NumberOfColumns = pAlign->m_Spans;
202 m_Columns = new CParse*[ m_NumberOfColumns ];
204 for (int c = 0; c < m_NumberOfColumns; c++)
206 m_Columns[c] = new CParse();
210 while ( pAlign->m_Str2->GetChar( loc2 ) == pAlign->m_Str1->GetChar( loc1 ) )
212 loc1++;
213 loc2++;
216 if ( loc1 > 1 ) // they agree at the beginning, up to loc1 - 1;
218 m_Columns[col]->Append( CStringSurrogate (pAlign->m_Str1->GetKeyPointer(), 0, loc1) ); // yuhuask should be loc1 not loc1 -1
225 col++;
226 StartLoc1 = loc1;
227 StartLoc2 = loc2;
229 while ( loc1 < pAlign->m_Length1 &&
230 pAlign->m_Str2->GetChar( pAlign->m_Match1[loc1] ) != pAlign->m_Str1->GetChar( loc1 )
233 loc1++;
236 while ( loc2 < pAlign->m_Length2 &&
237 pAlign->m_Str1->GetChar( pAlign->m_Match2[loc2] ) != pAlign->m_Str2->GetChar( loc2 ) )
239 loc2++;
243 CStringSurrogate Piece1 ( pAlign->m_Str1->GetKeyPointer(), StartLoc1, loc1 - StartLoc1 ); // yuhuask same problem StartLoc1 - 1 ?
244 CStringSurrogate Piece2 ( pAlign->m_Str2->GetKeyPointer(), StartLoc2, loc2 - StartLoc2 );
248 if ( Piece1.GetLength () == 0 )
250 m_Columns[col]->Append( CStringSurrogate(QString("NULL").unicode(), 0, 4) );
252 else
254 m_Columns[col]->Append( Piece1);
258 if ( Piece2.GetLength () == 0 )
260 m_Columns[col]->Append( CStringSurrogate(QString("NULL").unicode(), 0, 4) );
262 else
264 m_Columns[col]->Append( Piece2);
267 col++;
269 m_Columns[col]->Append( CStringSurrogate (pAlign->m_Str1->GetKeyPointer(), loc1, pAlign->m_Str1->GetKeyLength() - loc1 ) );
271 m_TemplateNumber = 0;
273 return ;
279 CTemplate::~CTemplate(void)
282 for (int i = 0; i < m_NumberOfColumns; i++)
284 delete m_Columns[i];
286 delete m_Columns;
290 //------------------------------------------------------
292 CParse* CTemplate::GetColumn(int n )
295 Q_ASSERT ( n >= 0 );
298 if ( n < m_NumberOfColumns && n >= 0 )
300 return m_Columns[n];
302 return NULL;
306 //------------------------------------------------------
313 QString CTemplate::Display()
315 QString Return;
317 for (int i = 0; i < m_NumberOfColumns; i++)
319 Return += m_Columns[i]->Display() + "_";
322 return Return;
325 /* From JG's original:
327 void CTemplate::Display(CListCtrl& List, int& LineNumber)
329 int NumberOfRows = 0;
330 for (int col = 0; col < m_NumberOfColumns; col++)
332 if (m_Columns[col]->Size() > NumberOfRows ) { NumberOfRows = m_Columns[col]->Size(); }
335 for (int row = 0; row < NumberOfRows+1; row++)
337 List.InsertItem (LineNumber + row, CString ("") );
340 List.SetItemText ( LineNumber, 0, IntToString( (int) GetSortingQuantity() ) );
345 for ( row = 0; row < NumberOfRows; row++)
347 for (int col = 0; col < m_NumberOfColumns; col++)
349 if ( m_Columns[col]->Size() > row )
351 List.SetItemText(LineNumber+row, col + 1, m_Columns[col]->GetAtCString(row+1) );
356 LineNumber += row;
363 QString CTemplate::DisplayFirstLine()
365 QString Return;
367 for (int i = 0; i < m_NumberOfColumns; i++)
369 Return += m_Columns[i]->GetPiece(1).Display() + "_";
372 return Return;
376 void CTemplate::Display(CListCtrl& List, int& LineNumber)
379 int NumberOfRows = 0;
380 for (int col = 0; col < m_NumberOfColumns; col++)
382 if (m_Columns[col]->Size() > NumberOfRows ) { NumberOfRows = m_Columns[col]->Size(); }
385 for (int row = 0; row < NumberOfRows+1; row++)
387 List.InsertItem (LineNumber + row, CString ("") );
390 // if ( GetSortingQuantity() > 15000 || GetSortingQuantity() < 0 )
391 // {
392 // ofstream out ("c:\\4_tests\\IntToString.txt", ios::app );
393 // out << endl << endl; OutputForFile (out);
394 // out << endl << GetSortingQuantity();
395 // out << " " << IntToString ( (int) GetSortingQuantity () );
396 // }
397 List.SetItemText ( LineNumber, 0, IntToString( (int) GetSortingQuantity() ) );
399 for ( row = 0; row < NumberOfRows; row++)
401 for (int col = 0; col < m_NumberOfColumns; col++)
403 if ( m_Columns[col]->Size() > row )
405 List.SetItemText(LineNumber+row, col + 1, m_Columns[col]->GetAtCString(row+1) );
410 LineNumber += row;
416 void CTemplate::ListDisplay(Q3ListView* List)
418 GetSortingQuantity();
419 static_cast<void>(new CTemplateListViewItem(List, this));
422 int CTemplate::GetVerticalColumn()
425 for (int i = 0; i < m_NumberOfColumns; i++)
427 if ( m_Columns[i]->Size() > 1 ) return i;
430 return -1;
432 void CTemplate::AddToColumn(CParse& Morphemes, int n)
435 for (int i = 1; i <= Morphemes.Size(); i++)
437 // m_Columns[n]->AppendInAlphabeticalOrder ( Morphemes.GetPiece(i), true );
438 m_Columns[n]->Append(Morphemes.GetPiece(i));
442 void CTemplate::AddToColumn(CStringSurrogate& SS, int n)
444 // m_Columns[n]->AppendInAlphabeticalOrder ( SS, true );
445 m_Columns[n]->Append(SS);
452 float CTemplate::GetSortingQuantity() const
456 { ComputeComplexity(); }
458 { ComputeWordsTotalComplexity(); }
461 Q_ASSERT ( m_WordsTotalComplexity - m_Complexity < 55000);
464 m_CurrentSortComplexity = m_WordsTotalComplexity - m_Complexity;
466 // I set up a switch flag to indicate that we switch the sorting value
467 if ( m_SwitchOfSortingValue )
469 int NumberOfStems =0;
470 int i;
471 CParse* OneColumn;
475 // Roughly get the number of stems
476 for ( i = 0; i < m_NumberOfColumns; i++)
478 OneColumn = m_Columns[i];
479 if ( OneColumn ->Size() > NumberOfStems)
481 NumberOfStems = OneColumn ->Size();
486 m_NewSortComplexity = (m_WordsTotalComplexity - m_Complexity)/(float)NumberOfStems;
488 else
490 m_NewSortComplexity = m_WordsTotalComplexity - m_Complexity;
495 return float ( m_NewSortComplexity);
496 //return float ( m_WordsTotalComplexity - m_Complexity );
504 float CTemplate::GetSortingQuantity()
507 // if ( m_Complexity <= 0 )
508 { ComputeComplexity(); }
510 // if ( m_WordsTotalComplexity <= 0 )
511 { ComputeWordsTotalComplexity(); }
513 // ofstream out ("c:\\4_tests\\TemplateDisplay.txt", ios::app );
515 Q_ASSERT ( m_WordsTotalComplexity - m_Complexity < 55000);
518 // if ( m_WordsTotalComplexity < 0 || m_WordsTotalComplexity > 15000)
519 // {
520 // OutputForFile(out);
521 // }
522 // if ( m_Complexity < 0 || m_Complexity > 15000)
523 // {
524 // OutputForFile(out);
525 // }
527 m_SortingComplexity = m_WordsTotalComplexity - m_Complexity;
529 return float ( m_WordsTotalComplexity - m_Complexity );
535 void CTemplate::AddAlignment(CAlignment* pAlignment)
537 struct not_implemented { };
538 throw not_implemented();
539 static_cast<void>(pAlignment);
541 // m_Alignments.AddTail( pAlignment);
542 // yuhuask ?
545 bool CTemplate::ShouldConflate ( CTemplate* pAlignment, int& ConflateColumn )
548 /* Works only with templates of 2 or 3 columns.
549 If two alignments share a column, they are collapsed. */
552 if ( m_NumberOfColumns != pAlignment->m_NumberOfColumns ) { return FALSE; }
553 if ( m_NumberOfColumns == 2 )
555 if (// m_Columns[0]->Size() > 1 &&
556 *m_Columns[0] == pAlignment->m_Columns[0] )
558 ConflateColumn = 1;
559 return TRUE;
561 else if (// m_Columns[1]->Size() > 1 &&
562 *m_Columns[1] == pAlignment->m_Columns[1] )
564 ConflateColumn = 0;
565 return TRUE;
567 else
569 return FALSE;
571 } // end of Size = 2;
572 else if ( m_NumberOfColumns == 3 )
574 if ( //m_Columns[0]->Size() > 1 &&
575 *m_Columns[0] == pAlignment->m_Columns[0] &&
576 //m_Columns[2]->Size() > 1 &&
577 *m_Columns[2] == pAlignment->m_Columns[2]
580 ConflateColumn = 1;
581 return TRUE;
583 else if (// m_Columns[1]->Size() > 1 &&
584 *m_Columns[1] == pAlignment->m_Columns[1] &&
585 // m_Columns[2]->Size() > 1 &&
586 *m_Columns[2] == pAlignment->m_Columns[2] )
588 ConflateColumn = 0;
589 return TRUE;
591 else if (// m_Columns[0]->Size() > 1 &&
592 *m_Columns[0] == pAlignment->m_Columns[0] &&
593 // m_Columns[1]->Size() > 1 &&
594 *m_Columns[1] == pAlignment->m_Columns[1] )
596 ConflateColumn = 2;
597 return TRUE;
599 else
601 return FALSE;
604 } // end of Size = 3;
606 return FALSE;
611 void CTemplate::ConflateWith ( CTemplate* pOther, int Column )
613 CParse& their_column = *pOther->m_Columns[Column];
614 CParse& my_column = *m_Columns[Column];
616 for (int i = 1; i <= their_column.Size(); ++i)
617 my_column.Append(their_column.GetPiece(i));
620 bool CTemplate::Readjust(CTemplateCollection* TempTemplates, int MinCount )
622 // bool AllSame = TRUE;
623 bool Return = FALSE;
624 CWordCollection Words;
628 ComputeWordsTotalComplexity();
630 int i;
631 for (i = 1; i < m_NumberOfColumns; i++)
633 if ( (int) m_Columns[i]->Size() < MinCount )
635 continue;
638 Words.RemoveAll();
639 for (int w = 1; w <= (int) m_Columns[i]->Size(); w++)
641 Words << m_Columns[i]->GetPiece(w).Display();
643 CStringSurrogate ssPrefix = Words.FindMaximalMajorityPrefix();
644 if ( ssPrefix.GetLength() == 0 ) { continue; }
646 //------------------------------------------
647 // if we have some material that might potentially be
648 // moved leftward, let's calculate the complexity in that
649 // new situation.
651 //------------------------------------------
654 CParse RightColumn = *m_Columns[ i ];
655 CParse RightColumnOtherMorphemes = RightColumn;
656 CParse LeftColumn = *m_Columns[i-1];
658 RightColumn .RemovePiecesThatDoNotBegin ( ssPrefix );
659 RightColumn .RemovePrefixFromAllPieces ( ssPrefix );
660 RightColumnOtherMorphemes
661 .RemovePiecesThatBegin( ssPrefix );
663 LeftColumn .SuffixToAllPieces2 ( ssPrefix );
665 double OldComplexity = m_Columns[i]->ComputeComplexity() +
666 m_Columns[i-1]->ComputeComplexity();
667 double NewComplexity = RightColumn.ComputeComplexity() +
668 LeftColumn.ComputeComplexity();
670 if (RightColumnOtherMorphemes.Size() > 0 )
672 NewComplexity += RightColumnOtherMorphemes.ComputeComplexity() +
673 m_Columns[i-1]->ComputeComplexity();
678 if ( NewComplexity < OldComplexity )
680 if ( RightColumnOtherMorphemes.Size() > 0 )
682 CTemplate TempTemplate ( *this );
683 *TempTemplate.GetColumn( i-1 ) = *m_Columns[ i-1 ];
684 *TempTemplate.GetColumn( i ) = RightColumnOtherMorphemes;
686 TempTemplates->AddTemplate ( &TempTemplate);
689 *m_Columns[i] = RightColumn;
690 *m_Columns[i-1] = LeftColumn;
696 CStringSurrogate TempSS;
699 for ( i = 1; i < m_NumberOfColumns ; i++)
702 if ( (int) m_Columns[i]->Size() < MinCount )
704 continue;
707 Words.RemoveAll();
708 for (int w = 1; w <= (int) m_Columns[i-1]->Size(); w++)
710 TempSS = m_Columns[i-1]->GetPiece(w);
711 TempSS.SetBackwards();
712 Words << TempSS;
714 TempSS = Words.FindMaximalMajorityPrefix();
715 TempSS.SetBackwards();
716 CStringSurrogate ssSuffix ( TempSS );
718 if ( ssSuffix.GetLength() == 0 ) { continue; }
720 //------------------------------------------
722 // if we have some material that might potentially be
723 // moved rightward, let's calculate the complexity in that
724 // new situation.
726 //------------------------------------------
729 CParse RightColumn = *m_Columns[ i ];
730 CParse LeftColumn = *m_Columns[i-1];
731 CParse LeftColumnOtherMorphemes = LeftColumn;
733 LeftColumn .RemovePiecesThatDoNotEnd ( ssSuffix );
734 LeftColumn .RemoveSuffixFromAllPieces ( ssSuffix );
735 LeftColumnOtherMorphemes
736 .RemovePiecesThatEnd ( ssSuffix );
738 RightColumn .PrefixToAllPieces2 ( ssSuffix );
740 double OldComplexity = m_Columns[i] ->ComputeComplexity() +
741 m_Columns[i-1]->ComputeComplexity();
743 double NewComplexity = RightColumn.ComputeComplexity() +
744 LeftColumn.ComputeComplexity();
746 if (LeftColumnOtherMorphemes.Size() > 0 )
748 NewComplexity += LeftColumnOtherMorphemes.ComputeComplexity() +
749 m_Columns[i]->ComputeComplexity();
754 if ( NewComplexity < OldComplexity )
756 if ( LeftColumnOtherMorphemes.Size() > 0 )
758 CTemplate TempTemplate ( *this );
759 *TempTemplate.GetColumn( i-1 ) = LeftColumnOtherMorphemes;
760 *TempTemplate.GetColumn( i ) = *m_Columns[ i ];
762 TempTemplates->AddTemplate ( &TempTemplate);
765 *m_Columns[i] = RightColumn;
766 *m_Columns[i-1] = LeftColumn;
770 continue;
776 return Return;
781 void CTemplate::OutputForFile(ofstream& out )
783 int MaxSize = (int) m_Columns[0]->Size();
784 CString Space = " ";
785 for ( int i = 1; i < m_NumberOfColumns; i++)
787 if ( (int) m_Columns[i]->Size() > MaxSize )
789 MaxSize = m_Columns[i]->Size();
792 for (int row = 0; row < MaxSize; row++)
794 out << endl;
795 for (int col = 0; col < m_NumberOfColumns; col++)
797 out << setw(20);
798 if ( row < (int) m_Columns[col]->Size() )
800 out << m_Columns[col]->GetAt(row+1) << ' ';
802 else
804 out << '-';
808 out << endl << endl << "Complexity: "<< endl;
809 out << "From length: "<< base2log (m_NumberOfColumns) << endl;
810 for ( i = 0; i < m_NumberOfColumns; i++)
812 out << i << " " << m_Columns[i]->ComputeComplexity() << endl;
818 int CTemplate::GetNumberOfLetters()
821 int Total = 0;
823 for (int i = 0; i < m_NumberOfColumns; i++)
825 Total += m_Columns[i]->GetKeyLength();
828 return Total;
831 float CTemplate::ComputeComplexity() const
833 m_Complexity = 0;
834 m_Complexity += float ( base2log (m_NumberOfColumns) +
835 g_Lambda * m_NumberOfColumns * (m_NumberOfColumns - 1 ) / 2 );
836 Q_ASSERT (m_Complexity < 20 );
837 for (int i = 0; i < m_NumberOfColumns; i++)
839 m_Complexity += (float) m_Columns[ i ]->ComputeComplexity();
840 Q_ASSERT ( m_Complexity < 55000 );
842 Q_ASSERT ( m_Complexity > 0 );
843 Q_ASSERT ( m_Complexity < 55000 );
847 return m_Complexity;
850 float CTemplate::ComputeWordsTotalComplexity() const
853 CParse TempWords,
854 NewWords;
855 // first figure out the words:
856 if (m_NumberOfColumns < 1)
858 m_WordsTotalComplexity= 0;
859 //out << " No columns." ;
860 return m_WordsTotalComplexity;
863 CParse Words = *m_Columns[0];
864 for (int col = 1; col < m_NumberOfColumns; col++)
866 NewWords.ClearParse();
867 for (int row = 1; row <= (int) m_Columns[col]->Size(); row++)
869 TempWords = Words;
870 CStringSurrogate ss = m_Columns[col]->GetPiece(row);
871 if (ss == CStringSurrogate(QString("NULL").unicode(), 0, 4) )
873 // TempWords = Words;
874 } else
876 TempWords.SuffixToAllPieces2 ( m_Columns[col]->GetPiece(row) );
878 NewWords.Append (&TempWords);
880 Words = NewWords;
884 m_WordsTotalComplexity = (float) Words.ComputeComplexity();
886 Q_ASSERT (m_WordsTotalComplexity < 50000 );
888 return m_WordsTotalComplexity;
894 float CTemplate:: GetComplexity()
896 if (m_Complexity <= 0)
898 ComputeComplexity();
900 return m_Complexity;
902 float CTemplate:: GetWordsTotalComplexity()
904 if (m_WordsTotalComplexity <= 0)
906 ComputeWordsTotalComplexity();
908 return m_WordsTotalComplexity;
913 void CTemplate::GetWordsAndParses(StringToParse& OneWordAndParse)
915 int CurrentSize ;
916 int Column;
917 int row;
918 StringToParse TempWordsAndParses;
919 StringToParse SwapWordsAndParses;
920 StringToParse::Iterator StringToParseIt;
921 QString TheWord, OneWord, NewWord, DumpWord;
922 CParse* TheParse;
923 CParse* NewParse;
924 bool IsPreNull;
927 CurrentSize = (int) m_Columns[0]->Size();
928 for ( row= 0; row<CurrentSize; row++)
930 TheWord = m_Columns[0]->GetPiece(row+1).Display();
931 //Delete the "#"
932 TheWord = TheWord.right((TheWord.length()-1));
933 if ( TheWord.length() == 0)
935 TheWord = QString("NULL");
938 TheParse = new CParse(CStringSurrogate(TheWord));
939 TempWordsAndParses.insert(TheWord,TheParse);
943 for ( int i = 1; i < m_NumberOfColumns; i++)
945 Column = i;
947 CurrentSize = (int) m_Columns[Column]->Size();
949 for ( StringToParseIt = TempWordsAndParses.begin();StringToParseIt != TempWordsAndParses.end(); StringToParseIt++)
951 TheWord = StringToParseIt.key();
952 TheParse = StringToParseIt.data();
954 // It only works for first col;
955 if (TheWord == QString("NULL"))
957 IsPreNull = true;
959 else
961 IsPreNull = false;
964 for ( row= 0; row<CurrentSize; row++)
966 OneWord = m_Columns[Column]->GetPiece(row+1).Display();
969 if ( OneWord == QString("NULL"))
971 // Impossible We get "NULL" + "NULL"
972 NewWord = TheWord;
973 NewParse = new CParse(*TheParse);
975 else
977 if (! IsPreNull)
979 NewWord = TheWord + OneWord;
980 NewParse = new CParse(*TheParse);
981 NewParse->Append(CStringSurrogate(OneWord));
983 else
985 NewWord = OneWord;
986 NewParse = new CParse();
987 NewParse->Append(CStringSurrogate(OneWord));
992 if (! SwapWordsAndParses.contains(NewWord))
994 SwapWordsAndParses.insert(NewWord,NewParse);
996 else
998 QMessageBox::information( NULL, "debug","Impossible Here. Please Let Yu Hu Debug it!", "OK") ;
1002 delete TheParse;
1006 TempWordsAndParses.clear();
1008 for ( StringToParseIt = SwapWordsAndParses.begin();StringToParseIt != SwapWordsAndParses.end(); StringToParseIt++)
1010 TheWord = StringToParseIt.key();
1011 TheParse = StringToParseIt.data();
1013 TempWordsAndParses.insert(TheWord, TheParse);
1016 SwapWordsAndParses.clear();
1022 for ( StringToParseIt = TempWordsAndParses.begin();StringToParseIt != TempWordsAndParses.end(); StringToParseIt++)
1024 TheWord = StringToParseIt.key();
1025 TheParse = StringToParseIt.data();
1026 OneWordAndParse.insert(TheWord, TheParse);
1035 //////////////////
1037 void CTemplate::AdjustMeByMovingCommonTailOrHead2(StringToFloat& MorphemeAndItsComplexity, int MaximumSizeOfEachColumn, int MaximumNumberOfMovingLetters, bool PrintChangedTemplates,\
1038 int Loopi, bool ShouldUseStickNess, StringToStringToFloat& GlobalStickNess, \
1039 float TotalGlobalStickNess, float TotalWords)
1044 int i, row;
1045 int backletteri, headletteri;
1046 QString TheMorpheme;
1047 bool PassColumnSizeTest = false;
1048 CParse* LeftColumn, *RightColumn, *OneColumn;
1049 bool changed = false;
1050 bool NoFurther;
1051 int MoveWhichColumn;
1052 int MoveL2RorR2L; // 0 --> left to right; 1 --> right to left
1053 int MoveHowmanyLetters = 0;
1054 float OneMorphemeComplexity, AlternativeMorphemeComplexity;
1055 QString CommonTail, CommonHead, MyTail, MyHead;
1056 QString Newmorpheme;
1057 float NewmorphemeComplexity, Diff, BestDiff, BestStickNessDiff;
1058 float StickNessDiff;
1059 float LeftColumnOldStickness, LeftColumnNewStickness;
1060 float RightColumnOldStickness, RightColumnNewStickness;
1061 int Tempi, Tempj;
1062 QString TempMorphemei, TempMorphemej;
1063 StringToFloat* oneCollection;
1064 float oneFloat;
1065 CParse PossibleNewColumn;
1069 //Through each column
1070 for ( i = 0; i < m_NumberOfColumns -1; i++)
1073 if ( i == m_ModifiedColumn)
1075 continue;
1078 LeftColumn = m_Columns[i];
1079 RightColumn = m_Columns[i+1];
1081 if (( LeftColumn ->Size() <= MaximumSizeOfEachColumn) && (RightColumn ->Size() <= MaximumSizeOfEachColumn))
1083 PassColumnSizeTest = true;
1085 else
1087 PassColumnSizeTest = false;
1088 continue;
1092 LeftColumnOldStickness = 0;
1093 LeftColumnNewStickness = 0;
1094 RightColumnOldStickness = 0;
1095 RightColumnNewStickness = 0;
1097 if ( ShouldUseStickNess )
1100 if ( LeftColumn ->Size() != 1)
1103 for ( Tempi = 1; Tempi < LeftColumn ->Size(); Tempi++)
1105 TempMorphemei = LeftColumn ->GetPiece(Tempi).Display();
1107 if ( i == 0)
1109 TempMorphemei = TempMorphemei.right(TempMorphemei.length() -1);
1110 if ( TempMorphemei.length() ==0)
1112 TempMorphemei = QString("NULL");
1118 for ( Tempj = Tempi + 1; Tempj <= LeftColumn ->Size(); Tempj++)
1120 TempMorphemej = LeftColumn ->GetPiece(Tempj).Display();
1122 if ( i == 0)
1124 TempMorphemej = TempMorphemej.right(TempMorphemej.length() -1);
1125 if ( TempMorphemej.length() ==0)
1127 TempMorphemej = QString("NULL");
1133 if ( TempMorphemei > TempMorphemej )
1135 oneCollection = GlobalStickNess[TempMorphemei];
1136 oneFloat = (*oneCollection)[TempMorphemej];
1137 LeftColumnOldStickness += oneFloat;
1139 else
1141 oneCollection = GlobalStickNess[TempMorphemej];
1142 oneFloat = (*oneCollection)[TempMorphemei];
1143 LeftColumnOldStickness += oneFloat;
1155 if ( RightColumn ->Size() != 1)
1158 for ( Tempi = 1; Tempi < RightColumn ->Size(); Tempi++)
1160 TempMorphemei = RightColumn ->GetPiece(Tempi).Display();
1163 for ( Tempj = Tempi +1; Tempj <= RightColumn ->Size(); Tempj++)
1165 TempMorphemej = RightColumn ->GetPiece(Tempj).Display();
1168 if ( TempMorphemei > TempMorphemej )
1170 oneCollection = GlobalStickNess[TempMorphemei];
1171 oneFloat = (*oneCollection)[TempMorphemej];
1172 RightColumnOldStickness += oneFloat;
1174 else
1176 oneCollection = GlobalStickNess[TempMorphemej];
1177 oneFloat = (*oneCollection)[TempMorphemei];
1178 RightColumnOldStickness += oneFloat;
1189 } // ShouldUseStickNess, then get the LeftColumnOldStickness, RightColumnOldStickness
1193 // Check LeftColumn
1195 backletteri = 1;
1196 NoFurther = false;
1198 while ( (backletteri <= MaximumNumberOfMovingLetters))
1201 LeftColumnNewStickness = 0;
1202 RightColumnNewStickness = 0;
1204 // Need experiment on how to deal with "NULL" since "NULL" is too common
1205 if ( (static_cast <int> ( (LeftColumn ->Size() )) == 1) && (LeftColumn->GetPiece(1).Display().length()<=backletteri ))
1207 NoFurther = true;
1208 break;
1212 PossibleNewColumn.ClearParse();
1214 // Check whether all words have backletteri tailletter in common
1215 for ( row =1; row <= LeftColumn ->Size(); row++)
1217 TheMorpheme = LeftColumn->GetPiece(row).Display();
1220 if ( i == 0)
1222 TheMorpheme = TheMorpheme.right(TheMorpheme.length() -1);
1224 if ( TheMorpheme.length() ==0)
1226 TheMorpheme = QString("NULL");
1231 if ( TheMorpheme == QString("NULL"))
1233 NoFurther = true;
1234 break;
1237 if ( static_cast <int> ( TheMorpheme.length() ) < backletteri)
1239 NoFurther = true;
1240 break;
1243 MyTail = TheMorpheme.right(backletteri);
1245 if ( row == 1)
1247 CommonTail = MyTail;
1248 continue;
1251 if ( MyTail != CommonTail)
1253 NoFurther = true;
1254 break;
1258 if ( NoFurther)
1260 break;
1263 // Now Calculate the Alternative Complexity
1264 Diff = 0.0;
1266 for ( row =1; row <= LeftColumn ->Size(); row++)
1268 TheMorpheme = LeftColumn->GetPiece(row).Display();
1270 if ( i == 0)
1272 TheMorpheme = TheMorpheme.right(TheMorpheme.length() -1);
1274 if ( TheMorpheme.length() ==0)
1276 TheMorpheme = QString("NULL");
1281 if (!MorphemeAndItsComplexity.contains(TheMorpheme)) //, OneMorphemeComplexity))
1283 QMessageBox::information(NULL, "Debug", "Can't find this Morpheme", "OK");
1284 return;
1287 OneMorphemeComplexity = MorphemeAndItsComplexity[TheMorpheme];
1289 Newmorpheme = TheMorpheme.left(TheMorpheme.length() - backletteri);
1291 if ( Newmorpheme.length() ==0)
1293 Newmorpheme = QString("NULL");
1296 // Populate the possible Column
1297 PossibleNewColumn.Append(Newmorpheme);
1299 if (!MorphemeAndItsComplexity.contains(Newmorpheme))//, AlternativeMorphemeComplexity))
1301 NewmorphemeComplexity = -base2log(1.0/TotalWords); // Big Penalty
1303 else
1305 AlternativeMorphemeComplexity = MorphemeAndItsComplexity[Newmorpheme];
1306 NewmorphemeComplexity = AlternativeMorphemeComplexity;
1309 Diff += NewmorphemeComplexity - OneMorphemeComplexity;
1315 // Compute the possible Column Stickness
1316 if ( ShouldUseStickNess )
1318 if ( PossibleNewColumn.Size() > 1)
1321 for ( Tempi = 1; Tempi < PossibleNewColumn.Size(); Tempi++)
1323 TempMorphemei = PossibleNewColumn.GetPiece(Tempi).Display();
1325 for ( Tempj = Tempi + 1; Tempj <= PossibleNewColumn.Size(); Tempj++)
1327 TempMorphemej = PossibleNewColumn.GetPiece(Tempj).Display();
1329 if ( TempMorphemei > TempMorphemej )
1331 if ( GlobalStickNess.contains(TempMorphemei))
1333 oneCollection = GlobalStickNess[TempMorphemei];
1335 if ( oneCollection ->contains(TempMorphemej))
1337 oneFloat = (*oneCollection)[TempMorphemej];
1338 LeftColumnNewStickness += oneFloat;
1340 else
1342 LeftColumnNewStickness += -base2log(1.0/TotalGlobalStickNess); // Big Penalty
1347 else
1349 LeftColumnNewStickness += -base2log(1.0/TotalGlobalStickNess); // Big Penalty
1352 else
1354 if ( GlobalStickNess.contains(TempMorphemej))
1356 oneCollection = GlobalStickNess[TempMorphemej];
1357 if ( oneCollection ->contains(TempMorphemei))
1359 oneFloat = (*oneCollection)[TempMorphemei];
1360 LeftColumnNewStickness += oneFloat;
1362 else
1364 LeftColumnNewStickness += -base2log(1.0/TotalGlobalStickNess); // Big Penalty
1369 else
1371 LeftColumnNewStickness += -base2log(1.0/TotalGlobalStickNess); // Big Penalty
1381 else
1383 LeftColumnNewStickness = 0.0;
1388 PossibleNewColumn.ClearParse();
1389 for ( row =1; row <= RightColumn ->Size(); row++)
1391 TheMorpheme = RightColumn->GetPiece(row).Display();
1393 if (!MorphemeAndItsComplexity.contains(TheMorpheme))
1395 QMessageBox::information (NULL, "Debug", "Can't find this Morpheme", "OK");
1396 return;
1399 OneMorphemeComplexity = MorphemeAndItsComplexity[TheMorpheme];
1401 if ( TheMorpheme == QString("NULL"))
1403 Newmorpheme = CommonTail;
1405 else
1407 Newmorpheme = CommonTail + TheMorpheme;
1410 PossibleNewColumn.Append(Newmorpheme);
1412 if (!MorphemeAndItsComplexity.contains(Newmorpheme)) //, ))
1414 NewmorphemeComplexity = -base2log(1.0/TotalWords); // Big Penalty
1416 else
1418 AlternativeMorphemeComplexity = MorphemeAndItsComplexity[Newmorpheme];
1419 NewmorphemeComplexity = AlternativeMorphemeComplexity;
1422 Diff += NewmorphemeComplexity - OneMorphemeComplexity;
1427 // Compute the possible Column Stickness
1428 if ( ShouldUseStickNess )
1430 if ( PossibleNewColumn.Size() > 1)
1433 for ( Tempi = 1; Tempi < PossibleNewColumn.Size(); Tempi++)
1435 TempMorphemei = PossibleNewColumn.GetPiece(Tempi).Display();
1437 for ( Tempj = Tempi + 1; Tempj <= PossibleNewColumn.Size(); Tempj++)
1439 TempMorphemej = PossibleNewColumn.GetPiece(Tempj).Display();
1441 if ( TempMorphemei > TempMorphemej )
1443 if ( GlobalStickNess.contains(TempMorphemei))
1445 oneCollection = GlobalStickNess[TempMorphemei];
1446 if ( oneCollection ->contains(TempMorphemej))
1448 oneFloat = (*oneCollection)[TempMorphemej];
1449 RightColumnNewStickness += oneFloat;
1451 else
1453 RightColumnNewStickness += -base2log(1.0/TotalGlobalStickNess); // Big Penalty
1458 else
1460 RightColumnNewStickness += -base2log(1.0/TotalGlobalStickNess); // Big Penalty
1463 else
1465 if ( GlobalStickNess.contains(TempMorphemej))
1467 oneCollection = GlobalStickNess[TempMorphemej];
1468 if ( oneCollection ->contains(TempMorphemei))
1470 oneFloat = (*oneCollection)[TempMorphemei];
1471 RightColumnNewStickness += oneFloat;
1473 else
1475 RightColumnNewStickness += -base2log(1.0/TotalGlobalStickNess); // Big Penalty
1480 else
1482 RightColumnNewStickness += -base2log(1.0/TotalGlobalStickNess); // Big Penalty
1492 else
1494 RightColumnNewStickness = 0.0;
1501 // Check whether Diff > 0, if so records these changes
1503 if ( ShouldUseStickNess )
1506 StickNessDiff = ( RightColumnNewStickness + LeftColumnNewStickness) - (RightColumnOldStickness + LeftColumnOldStickness) ;
1508 Diff += StickNessDiff;
1513 if (Diff < 0)
1515 if (changed)
1517 if ( Diff < BestDiff)
1519 BestDiff = Diff;
1520 BestStickNessDiff = StickNessDiff;
1521 MoveWhichColumn = i;
1522 MoveL2RorR2L = 0;
1523 MoveHowmanyLetters = backletteri;
1524 // Always remember the left one column
1525 m_ModifiedColumn = i;
1529 else
1531 changed = true;
1532 BestDiff = Diff;
1533 MoveWhichColumn = i;
1534 MoveL2RorR2L = 0;
1535 MoveHowmanyLetters = backletteri;
1536 // Always remember the left one column
1537 m_ModifiedColumn = i;
1542 backletteri++;
1547 // Check RightColumn
1549 headletteri = 1;
1550 NoFurther = false;
1552 while ( (headletteri <= MaximumNumberOfMovingLetters))
1555 LeftColumnNewStickness = 0;
1556 RightColumnNewStickness = 0;
1558 PossibleNewColumn.ClearParse();
1560 // Need experiment on how to deal with "NULL" since "NULL" is too common
1561 if ( (static_cast <int> ((RightColumn ->Size())) == 1) && (RightColumn->GetPiece(1).Display().length()<=headletteri ))
1563 NoFurther = true;
1564 break;
1567 // Check whether all words have headletteri headletter in common
1568 for ( row =1; row <= RightColumn ->Size(); row++)
1570 TheMorpheme = RightColumn->GetPiece(row).Display();
1572 if ( TheMorpheme == QString("NULL"))
1574 NoFurther = true;
1575 break;
1578 if ( static_cast <int> ( TheMorpheme.length() ) < headletteri)
1580 NoFurther = true;
1581 break;
1584 MyHead = TheMorpheme.left(headletteri);
1586 if ( row == 1)
1588 CommonHead = MyHead;
1589 continue;
1592 if ( MyHead != CommonHead)
1594 NoFurther = true;
1595 break;
1599 if ( NoFurther)
1601 break;
1604 // Now Calculate the Alternative Complexity
1605 Diff = 0.0;
1607 for ( row =1; row <= RightColumn ->Size(); row++)
1609 TheMorpheme = RightColumn->GetPiece(row).Display();
1611 if (!MorphemeAndItsComplexity.contains(TheMorpheme))
1613 QMessageBox::information (NULL, "Debug", "Can't find this Morpheme", "OK");
1614 return;
1618 OneMorphemeComplexity = MorphemeAndItsComplexity[TheMorpheme];
1619 Newmorpheme = TheMorpheme.right(TheMorpheme.length() - headletteri);
1621 if ( Newmorpheme.length() ==0)
1623 Newmorpheme = QString("NULL");
1626 PossibleNewColumn.Append(Newmorpheme);
1628 if (!MorphemeAndItsComplexity.contains(Newmorpheme))
1630 NewmorphemeComplexity = -base2log(1.0/TotalWords); // Big Penalty
1632 else
1634 AlternativeMorphemeComplexity = MorphemeAndItsComplexity[Newmorpheme];
1635 NewmorphemeComplexity = AlternativeMorphemeComplexity;
1638 Diff += NewmorphemeComplexity - OneMorphemeComplexity;
1642 // Compute the possible Column Stickness
1643 if ( ShouldUseStickNess )
1645 if ( PossibleNewColumn.Size() > 1)
1648 for ( Tempi = 1; Tempi < PossibleNewColumn.Size(); Tempi++)
1650 TempMorphemei = PossibleNewColumn.GetPiece(Tempi).Display();
1652 for ( Tempj = Tempi + 1; Tempj <= PossibleNewColumn.Size(); Tempj++)
1654 TempMorphemej = PossibleNewColumn.GetPiece(Tempj).Display();
1656 if ( TempMorphemei > TempMorphemej )
1658 if ( GlobalStickNess.contains(TempMorphemei))
1660 oneCollection = GlobalStickNess[TempMorphemei];
1661 if ( oneCollection ->contains(TempMorphemej))
1663 oneFloat = (*oneCollection)[TempMorphemej];
1664 RightColumnNewStickness += oneFloat;
1666 else
1668 RightColumnNewStickness += -base2log(1.0/TotalGlobalStickNess); // Big Penalty
1673 else
1675 RightColumnNewStickness += -base2log(1.0/TotalGlobalStickNess); // Big Penalty
1678 else
1680 if ( GlobalStickNess.contains(TempMorphemej))
1682 oneCollection = GlobalStickNess[TempMorphemej];
1683 if ( oneCollection ->contains(TempMorphemei))
1685 oneFloat = (*oneCollection)[TempMorphemei];
1686 RightColumnNewStickness += oneFloat;
1688 else
1690 RightColumnNewStickness += -base2log(1.0/TotalGlobalStickNess); // Big Penalty
1695 else
1697 RightColumnNewStickness += -base2log(1.0/TotalGlobalStickNess); // Big Penalty
1707 else
1709 RightColumnNewStickness = 0.0;
1713 PossibleNewColumn.ClearParse();
1714 for ( row =1; row <= LeftColumn ->Size(); row++)
1716 TheMorpheme = LeftColumn->GetPiece(row).Display();
1718 if ( i == 0)
1720 TheMorpheme = TheMorpheme.right(TheMorpheme.length() -1);
1721 if ( TheMorpheme.length() ==0)
1723 TheMorpheme = QString("NULL");
1728 if (!MorphemeAndItsComplexity.contains(TheMorpheme))
1730 QMessageBox::information (NULL, "Debug", "Can't find this Morpheme", "OK");
1731 return;
1735 OneMorphemeComplexity = MorphemeAndItsComplexity[TheMorpheme];
1737 if ( TheMorpheme == QString("NULL"))
1739 Newmorpheme = CommonHead;
1741 else
1743 Newmorpheme = TheMorpheme + CommonHead;
1746 PossibleNewColumn.Append(Newmorpheme);
1748 if (!MorphemeAndItsComplexity.contains(Newmorpheme))
1750 NewmorphemeComplexity = -base2log(1.0/TotalWords); // Big Penalty
1752 else
1754 AlternativeMorphemeComplexity = MorphemeAndItsComplexity[Newmorpheme];
1755 NewmorphemeComplexity = AlternativeMorphemeComplexity;
1758 Diff += NewmorphemeComplexity - OneMorphemeComplexity;
1762 // Compute the possible Column Stickness
1763 if ( ShouldUseStickNess )
1765 if ( PossibleNewColumn.Size() > 1)
1768 for ( Tempi = 1; Tempi < PossibleNewColumn.Size(); Tempi++)
1770 TempMorphemei = PossibleNewColumn.GetPiece(Tempi).Display();
1772 for ( Tempj = Tempi + 1; Tempj <= PossibleNewColumn.Size(); Tempj++)
1774 TempMorphemej = PossibleNewColumn.GetPiece(Tempj).Display();
1776 if ( TempMorphemei > TempMorphemej )
1778 if ( GlobalStickNess.contains(TempMorphemei))
1780 oneCollection = GlobalStickNess[TempMorphemei];
1781 if ( oneCollection ->contains(TempMorphemej))
1783 oneFloat = (*oneCollection)[TempMorphemej];
1784 LeftColumnNewStickness += oneFloat;
1786 else
1788 LeftColumnNewStickness += -base2log(1.0/TotalGlobalStickNess); // Big Penalty
1793 else
1795 LeftColumnNewStickness += -base2log(1.0/TotalGlobalStickNess); // Big Penalty
1798 else
1800 if ( GlobalStickNess.contains(TempMorphemej))
1802 oneCollection = GlobalStickNess[TempMorphemej];
1803 if ( oneCollection ->contains(TempMorphemei))
1805 oneFloat = (*oneCollection)[TempMorphemei];
1806 LeftColumnNewStickness += oneFloat;
1808 else
1810 LeftColumnNewStickness += -base2log(1.0/TotalGlobalStickNess); // Big Penalty
1815 else
1817 LeftColumnNewStickness += -base2log(1.0/TotalGlobalStickNess); // Big Penalty
1827 else
1829 LeftColumnNewStickness = 0.0;
1834 // Check whether Diff > 0, if so records these changes
1836 if ( ShouldUseStickNess )
1839 StickNessDiff = ( RightColumnNewStickness + LeftColumnNewStickness) - (RightColumnOldStickness + LeftColumnOldStickness) ;
1841 Diff += StickNessDiff;
1845 if (Diff < 0)
1847 if (changed)
1849 if ( Diff < BestDiff)
1851 BestDiff = Diff;
1852 BestStickNessDiff = StickNessDiff;
1853 MoveWhichColumn = i+1;
1854 MoveL2RorR2L = 1;
1855 MoveHowmanyLetters = headletteri;
1857 // Always remember the left one column
1858 m_ModifiedColumn = i;
1862 else
1864 changed = true;
1865 BestDiff = Diff;
1866 MoveWhichColumn = i+1;
1867 MoveL2RorR2L = 1;
1868 MoveHowmanyLetters = headletteri;
1870 // Always remember the left one column
1871 m_ModifiedColumn = i;
1876 headletteri++;
1882 // Now, we can know whether we adjust this template
1885 // int MaximumOutputMorphemeInOneColumn = 8;
1886 QString DisplayOfOneColumn;
1889 if ( changed )
1891 // Print Original Templates
1892 if (PrintChangedTemplates)
1895 QFile file( "AdjustedTemplates.txt" );
1897 if ( !file.open( QIODevice::WriteOnly | QIODevice::Append ) )
1899 QMessageBox::information(NULL, "Error", "Can't Open the file!", "OK");
1900 return;
1903 Q3TextStream outf( &file );
1905 //outf.open ("AdjustedTemplates.txt", ofstream::out | ofstream::app);
1907 outf << "***********"<<Loopi<<"************" <<endl;
1908 outf << " Original Template:" << endl;
1909 outf << " ";
1911 for ( i = 0; i < m_NumberOfColumns; i++)
1913 OneColumn = m_Columns[i];
1915 DisplayOfOneColumn = QString("{ ");
1916 for (int j = 1; j <= OneColumn ->Size();j++)
1918 DisplayOfOneColumn += OneColumn ->GetPiece(j).Display();
1919 if ( j != OneColumn ->Size())
1921 DisplayOfOneColumn += QString(" , ");
1925 DisplayOfOneColumn += QString(" }");
1926 outf << DisplayOfOneColumn;
1927 if ( i != m_NumberOfColumns -1)
1929 outf << " --> ";
1933 outf << endl;
1935 file.close();
1936 //outf.close();
1940 // Modify this tempalte based on former bestdiff record
1941 CParse TempParse;
1942 QString MovedSubString;
1943 CStringSurrogate TempCSS;
1945 if ( MoveL2RorR2L == 0)
1947 // when letters are moved from left to right
1948 LeftColumn = m_Columns[MoveWhichColumn];
1949 RightColumn = m_Columns[MoveWhichColumn+1];
1950 MovedSubString = LeftColumn ->GetPiece(1).Display().right(MoveHowmanyLetters);
1952 // Modify left column
1953 TempParse.ClearParse();
1954 for ( row =1; row <= LeftColumn ->Size(); row++)
1956 TheMorpheme = LeftColumn->GetPiece(row).Display();
1959 if ( MoveWhichColumn ==0)
1961 TheMorpheme = TheMorpheme.right(TheMorpheme.length() -1);
1963 if ( TheMorpheme.length() ==0)
1965 QMessageBox::information (NULL, "Debug", "Impossible Here!", "OK");
1966 TheMorpheme = QString("NULL");
1972 Newmorpheme = TheMorpheme.left(TheMorpheme.length() - MoveHowmanyLetters);
1974 if ( Newmorpheme.length() ==0)
1976 Newmorpheme = QString("NULL");
1980 TempCSS = CStringSurrogate(Newmorpheme);
1981 if ( !TempParse.Contains(TempCSS))
1983 TempParse.Append(Newmorpheme);
1988 if ( TempParse.Size() ==0)
1990 QMessageBox::information(NULL, "Debug", "TempParse Size should not be 0", "OK");
1991 return;
1994 // If this is the first column, we put "#" back
1995 if ( MoveWhichColumn ==0)
1997 CParse SwapParse;
1998 int Swapi;
1999 QString Swappiece;
2001 for ( Swapi = 1; Swapi <= TempParse.Size(); Swapi++)
2003 Swappiece = TempParse.GetPiece(Swapi).Display();
2005 if ( Swappiece == QString("NULL"))
2007 Swappiece = QString("#");
2009 else
2011 Swappiece = QString("#") + Swappiece;
2014 SwapParse.Append(Swappiece);
2017 TempParse = SwapParse ;
2020 (*LeftColumn) = TempParse;
2023 // Modify right column
2024 TempParse.ClearParse();
2025 for ( row =1; row <= RightColumn ->Size(); row++)
2027 TheMorpheme = RightColumn->GetPiece(row).Display();
2029 if ( TheMorpheme == QString("NULL"))
2031 Newmorpheme = MovedSubString;
2033 else
2035 Newmorpheme = MovedSubString + TheMorpheme;
2038 TempCSS = CStringSurrogate(Newmorpheme);
2039 if ( !TempParse.Contains(TempCSS))
2041 TempParse.Append(Newmorpheme);
2047 if ( TempParse.Size() ==0)
2049 QMessageBox::information(NULL, "Debug", "TempParse Size should not be 0", "OK");
2050 return;
2053 (*RightColumn) = TempParse;
2056 else
2058 // when letters are moved from right to left
2059 LeftColumn = m_Columns[MoveWhichColumn-1];
2060 RightColumn = m_Columns[MoveWhichColumn];
2061 MovedSubString = RightColumn ->GetPiece(1).Display().left(MoveHowmanyLetters);
2063 // Modify right column
2064 TempParse.ClearParse();
2065 for ( row =1; row <= RightColumn ->Size(); row++)
2067 TheMorpheme = RightColumn->GetPiece(row).Display();
2068 Newmorpheme = TheMorpheme.right(TheMorpheme.length() - MoveHowmanyLetters);
2069 if ( Newmorpheme.length() ==0)
2071 Newmorpheme = QString("NULL");
2074 TempCSS = CStringSurrogate(Newmorpheme);
2075 if ( !TempParse.Contains(TempCSS))
2077 TempParse.Append(Newmorpheme);
2082 if ( TempParse.Size() ==0)
2084 QMessageBox::information(NULL, "Debug", "TempParse Size should not be 0", "OK");
2085 return;
2088 (*RightColumn) = TempParse;
2091 // Modify left column
2092 TempParse.ClearParse();
2093 for ( row =1; row <= LeftColumn ->Size(); row++)
2095 TheMorpheme = LeftColumn->GetPiece(row).Display();
2097 if ( MoveWhichColumn == 1)
2099 TheMorpheme = TheMorpheme.right(TheMorpheme.length() -1);
2100 if ( TheMorpheme.length() ==0)
2102 TheMorpheme = QString("NULL");
2107 if ( TheMorpheme == QString("NULL"))
2109 Newmorpheme = MovedSubString;
2111 else
2113 Newmorpheme = TheMorpheme + MovedSubString;
2116 TempCSS = CStringSurrogate(Newmorpheme);
2117 if ( !TempParse.Contains(TempCSS))
2119 TempParse.Append(Newmorpheme);
2124 if ( TempParse.Size() ==0)
2126 QMessageBox::information(NULL, "Debug", "TempParse Size should not be 0", "OK");
2127 return;
2130 // If this is the first column, we put "#" back
2131 if ( MoveWhichColumn == 1)
2133 CParse SwapParse;
2134 int Swapi;
2135 QString Swappiece;
2137 for ( Swapi = 1; Swapi <= TempParse.Size(); Swapi++)
2139 Swappiece = TempParse.GetPiece(Swapi).Display();
2141 if ( Swappiece == QString("NULL"))
2143 Swappiece = QString("#");
2145 else
2147 Swappiece = QString("#") + Swappiece;
2150 SwapParse.Append(Swappiece);
2153 TempParse = SwapParse ;
2156 (*LeftColumn) = TempParse;
2161 // Check through this template, delete those column, which only has "NULL" or "#"
2162 int Newi;
2163 bool DeleteFirstColumn;
2166 DeleteFirstColumn = false;
2167 Newi = 0;
2168 for ( i = 0; i < m_NumberOfColumns; i++)
2170 OneColumn = m_Columns[i];
2172 if (( OneColumn ->Size() ==1 )&&(OneColumn ->GetPiece(1).Display() == QString("#")))
2174 DeleteFirstColumn = true;
2175 continue;
2178 if ( ( OneColumn ->Size() ==1 )&&(OneColumn ->GetPiece(1).Display() == QString("NULL")))
2180 continue;
2183 if ( DeleteFirstColumn && (Newi == 0))
2185 CParse SwapParse;
2186 int Swapi;
2187 QString Swappiece;
2189 for ( Swapi = 1; Swapi <= OneColumn ->Size(); Swapi++)
2191 Swappiece = OneColumn ->GetPiece(Swapi).Display();
2193 if ( Swappiece == QString("NULL"))
2195 Swappiece = QString("#");
2197 else
2199 Swappiece = QString("#") + Swappiece;
2202 SwapParse.Append(Swappiece);
2205 (*OneColumn) = SwapParse ;
2209 m_Columns[Newi] = OneColumn;
2210 Newi++;
2213 m_NumberOfColumns = Newi;
2216 // Print Result Templates
2217 if (PrintChangedTemplates)
2219 QFile file( "AdjustedTemplates.txt" );
2221 if ( !file.open( QIODevice::WriteOnly | QIODevice::Append) )
2223 QMessageBox::information(NULL, "Error", "Can't Open the file!", "OK");
2224 return;
2227 Q3TextStream outf( &file );
2229 //outf.open ("AdjustedTemplates.txt", ofstream::out | ofstream::app);
2231 outf << " Adjusted Template:" << endl;
2232 outf << " ";
2234 for ( i = 0; i < m_NumberOfColumns; i++)
2236 OneColumn = m_Columns[i];
2238 DisplayOfOneColumn = QString("{ ");
2239 for (int j = 1; j <= OneColumn ->Size(); j++)
2241 DisplayOfOneColumn += OneColumn ->GetPiece(j).Display();
2242 if ( j != OneColumn ->Size())
2244 DisplayOfOneColumn += QString(" , ");
2248 DisplayOfOneColumn += QString(" }");
2249 outf << DisplayOfOneColumn;
2250 if ( i != m_NumberOfColumns -1)
2252 outf << " --> ";
2256 outf << endl;
2258 outf << "The Diff in Pointer is " << BestDiff << ", The StickNess Diff is " << BestStickNessDiff <<".";
2260 outf << endl;
2261 outf << endl;
2262 outf << endl;
2264 file.close();
2270 else
2272 QFile file( "AdjustedTemplates.txt" );
2274 if ( !file.open( QIODevice::WriteOnly | QIODevice::Append ) )
2276 QMessageBox::information(NULL, "Error", "Can't Open the file!", "OK");
2277 return;
2280 Q3TextStream outf( &file );
2282 outf<< "No Change for this template !" <<endl;
2284 file.close();
2291 namespace {
2292 QString omit_first_letter(const QString& morpheme)
2294 Q_ASSERT(morpheme != TheStringNULL);
2295 Q_ASSERT(morpheme.size() >= 1);
2297 if (morpheme.size() == 1)
2298 return TheStringNULL;
2299 else
2300 return morpheme.mid(1);
2304 void CTemplate::AdjustMeAfterAbsorb1(QMap<QString, int>& AbsorbedWords,
2305 bool Conservative, int MiniMumPrefixOrSuffixNeedToBeAbsorbed, int Loopi)
2307 const bool OpenPrint = true;
2309 int StemSize = 0;
2310 int StemColumn = -1;
2311 for (int i = 0; i < m_NumberOfColumns; ++i) {
2312 CParse& column = *m_Columns[i];
2314 if (column.Size() > StemSize) {
2315 StemSize = column.Size();
2316 StemColumn = i;
2320 Q_ASSERT(StemSize >= 0);
2321 Q_ASSERT(StemColumn >= 0 && StemColumn < m_NumberOfColumns);
2322 Q_ASSERT(m_NumberOfColumns == 2 || m_NumberOfColumns == 3);
2324 // We don't consider the case where Stem is in middle
2325 if (m_NumberOfColumns == 3 && StemColumn == 1)
2326 return;
2328 Q_ASSERT(StemColumn == 0 || StemColumn == 1 || StemColumn == 2);
2330 if (StemColumn == 1)
2331 // Already returned if number of columns == 3.
2332 Q_ASSERT(m_NumberOfColumns == 2);
2334 if (StemColumn == 2)
2335 // it only can be 3-column
2336 Q_ASSERT(m_NumberOfColumns == 3);
2338 // values are all 1. XXX. use std::set.
2339 QMap<QString, int> affix_set;
2341 if (m_NumberOfColumns == 3) {
2342 Q_ASSERT(StemColumn == 0);
2343 GetWordsFromTwoColums(affix_set, 1);
2344 } else {
2345 Q_ASSERT(m_NumberOfColumns == 2);
2347 CParse& column = *m_Columns[1 - StemColumn];
2348 for (int row = 1; row <= column.Size(); ++row) {
2349 QString affix = column.GetPiece(row).Display();
2350 if (StemColumn == 1)
2351 affix = omit_first_letter(affix);
2353 affix_set[affix] = 1;
2357 // With these affixes, check the stem column
2358 CParse& column = *m_Columns[StemColumn];
2359 CParse TempParse;
2360 QMap<QString, int> AbsorbedStems;
2361 int AbsorbedStemSize = 0;
2362 for (int row = 1; row <= column.Size(); ++row) {
2363 QString stem = column.GetPiece(row).Display();
2364 if (StemColumn == 0)
2365 stem = omit_first_letter(stem);
2367 int FitHowManyPrefixOrSuffix = 0;
2368 for (QMap<QString, int>::const_iterator iter =
2369 affix_set.constBegin();
2370 iter != affix_set.constEnd();
2371 ++iter) {
2372 QString affix = iter.key();
2373 Q_ASSERT(iter.value() == 1);
2375 QString MakeUpWord;
2376 if (stem != TheStringNULL)
2377 MakeUpWord.append(stem);
2379 if (affix != TheStringNULL) {
2380 if (StemColumn == 0) {
2381 MakeUpWord.append(affix);
2382 } else {
2383 Q_ASSERT(StemColumn ==
2384 m_NumberOfColumns - 1);
2385 MakeUpWord.prepend(affix);
2389 if (AbsorbedWords.contains(MakeUpWord))
2390 ++FitHowManyPrefixOrSuffix;
2393 int CheckLimit = affix_set.size();
2394 if (Conservative == 0)
2395 CheckLimit = std::min(CheckLimit,
2396 MiniMumPrefixOrSuffixNeedToBeAbsorbed);
2398 if (FitHowManyPrefixOrSuffix < CheckLimit) {
2399 TempParse.Append(CStringSurrogate(stem));
2400 } else {
2401 ++AbsorbedStemSize;
2402 AbsorbedStems[stem] = 1;
2406 if (AbsorbedStemSize == StemSize)
2407 m_IsDeleted = true;
2408 else if (AbsorbedStemSize > 0)
2409 std::swap(*(m_Columns[StemColumn]), TempParse);
2410 else
2411 return;
2413 if (!OpenPrint)
2414 return;
2416 QFile file("Absorb1.txt");
2417 if (!file.open(QIODevice::WriteOnly | QIODevice::Append)) {
2418 QMessageBox::information(0, "Error",
2419 "Can't Open the file!", "OK");
2420 return;
2422 QTextStream outf(&file);
2424 outf << "*************" << Loopi << "**************" << endl;
2425 if (AbsorbedStemSize == StemSize) {
2426 outf << "\tDelete This Template:" << endl;
2427 } else {
2428 Q_ASSERT(AbsorbedStemSize > 0);
2429 // Took some stems away from one template
2430 outf << "\tAbsorb Some Stems From This Template:" <<
2431 endl;
2433 outf << "\t\t";
2435 for (int i = 0; i < m_NumberOfColumns; ++i) {
2436 CParse& column = i != StemColumn
2437 ? *m_Columns[i]
2438 : TempParse; // old *m_Columns[i]
2440 outf << "{ ";
2441 for (int j = 1; j <= column.Size(); ++j) {
2442 outf << column.GetPiece(j).Display();
2443 if (j == column.Size())
2444 break;
2445 outf << " , ";
2447 outf << " }";
2449 if (i == m_NumberOfColumns - 1)
2450 break;
2451 outf << " --> ";
2453 outf << endl;
2455 if (AbsorbedStemSize != StemSize) {
2456 Q_ASSERT(AbsorbedStemSize > 0);
2457 outf << "Absorbed Stems: ";
2459 outf << "{ ";
2460 if (!AbsorbedStems.isEmpty()) {
2461 QMap<QString, int>::const_iterator iter =
2462 AbsorbedStems.constBegin();
2464 outf << iter.key(); // first stem
2465 Q_ASSERT(iter.value() == 1);
2467 for (++iter; iter != AbsorbedStems.constEnd();
2468 ++iter) {
2469 outf << " , " << iter.key();
2470 Q_ASSERT(iter.value() == 1);
2473 outf << " }" << endl;
2475 outf << endl;
2478 void CTemplate::GetWordsFromTwoColums(StringToInt& AllPrefixes, int StartColumn)
2481 int CurrentSize ;
2482 int Column;
2483 int row;
2484 QString TheWord, OneWord, NewWord, DumpWord;
2485 StringToInt::iterator StringToIntIt;
2486 bool IsPreNull;
2487 StringToInt TempWords;
2490 if ( GetNumberOfColumns() < StartColumn +2)
2492 QMessageBox::information(NULL, "Error", "Impossible Here So far. Please Let Yu Hu Debug it!", "OK") ;
2493 return;
2497 AllPrefixes.clear();
2499 CurrentSize = (int) m_Columns[StartColumn]->Size();
2500 for ( row= 0; row<CurrentSize; row++)
2502 TheWord = m_Columns[StartColumn]->GetPiece(row+1).Display();
2504 if ( StartColumn == 0)
2506 TheWord = TheWord.right((TheWord.length()-1));
2507 if ( TheWord.length() ==0)
2509 TheWord = QString("NULL");
2513 TempWords.insert(TheWord,1);
2517 Column = StartColumn + 1;
2519 CurrentSize = (int) m_Columns[Column]->Size();
2521 for ( StringToIntIt = TempWords.begin(); StringToIntIt != TempWords.end(); StringToIntIt++)
2523 TheWord = StringToIntIt.key();
2525 // It only works for first col;
2526 if (TheWord == QString("NULL"))
2528 IsPreNull = true;
2530 else
2532 IsPreNull = false;
2535 for ( row= 0; row<CurrentSize; row++)
2537 OneWord = m_Columns[Column]->GetPiece(row+1).Display();
2539 if ( OneWord == QString("NULL"))
2541 // possible We get "NULL" + "NULL"
2542 NewWord = TheWord;
2544 else
2546 if (! IsPreNull)
2548 NewWord = TheWord + OneWord;
2550 else
2552 NewWord = OneWord;
2557 if (! AllPrefixes.contains(NewWord))
2559 AllPrefixes.insert(NewWord,1);
2561 else
2563 QMessageBox::information(NULL, "Error" ,"Impossible Here. Please Let Yu Hu Debug it!", "OK") ;
2576 void CTemplate::FindMorphemePrefixOrSuffixWithParadigmaticGraph(int Loopi, int MaximumSizeOfStemColumn, StringToFloat& GlobalNodeStickNess2, StringToStringToFloat& TotalGlobalStickNess2, float GlobalStickNess2, float TotalWord2)
2579 CParse* oneColumn;
2580 int i, itemi, itemj;
2581 QString Morphemei, Morphemej;
2582 bool IsMePrefixOfOthers, IsMeSuffixOfOthers;
2583 QString ThePrefixOrSuffix;
2584 bool FoundAndModified;
2585 bool HasEverChanged;
2586 CTemplate* BackUpTemplate;
2587 CStringSurrogate TempCSS, TempCSS1;
2588 CStringSurrogate NULLCSS;
2591 NULLCSS = CStringSurrogate(QString("NULL"));
2595 BackUpTemplate = new CTemplate(*this);
2598 HasEverChanged = false;
2599 FoundAndModified = true;
2601 while( FoundAndModified )
2604 FoundAndModified = false;
2605 for ( i = 0; i < m_NumberOfColumns; i++)
2607 oneColumn = m_Columns[i];
2609 if ( oneColumn ->Size() > MaximumSizeOfStemColumn)
2611 continue;
2614 if ( oneColumn ->Size() == 1)
2616 continue;
2620 for ( itemi = 1; itemi <= oneColumn ->Size(); itemi++)
2623 Morphemei = oneColumn ->GetPiece(itemi).Display();
2625 if ( i ==0)
2627 Morphemei = Morphemei.right(Morphemei.length() -1);
2628 if ( Morphemei.length() ==0)
2630 Morphemei = QString("NULL");
2631 continue;
2632 // NULL could not be any prefix or suffix of other morphemes
2637 if ( Morphemei == QString("NULL"))
2639 continue;
2640 // NULL could not be any prefix or suffix of other morphemes
2643 if ( Morphemei.length() <= 1)
2645 continue;
2646 // too short morpheme is suspicious for prefix and suffix of others
2649 ThePrefixOrSuffix = Morphemei; // Morphemei could be others' prefix or suffix
2651 // Check through the other morpheme to see whether this morphemei could be prefix
2652 IsMePrefixOfOthers = false;
2653 for ( itemj= 1; itemj <= oneColumn ->Size(); itemj++)
2655 Morphemej = oneColumn ->GetPiece(itemj).Display();
2657 if ( i ==0)
2659 Morphemej = Morphemej.right(Morphemej.length() -1);
2660 if ( Morphemej.length() ==0)
2662 Morphemej = QString("NULL");
2665 continue;
2669 if ( Morphemej.length() <= Morphemei.length())
2671 continue;
2674 if ( Morphemej.left(Morphemei.length()) != Morphemei)
2676 continue;
2679 IsMePrefixOfOthers = true;
2680 break;
2683 // If found morphemei is prefix of at least one other morpheme
2684 if ( IsMePrefixOfOthers)
2686 CParse OriginalColumn, NewColumnOne, NewColumnTwo;
2687 CParse* ReplaceFirstColumn;
2688 int Originali;
2689 QString oneOriginalMoprheme, oneNewMorpheme;
2690 float OlderRobustNess, NewRobustNess;
2691 float RobustNessDiff;
2692 int Replacei;
2693 CParse** TempColumns;
2694 CParse* ReplaceOneColumn;
2695 QString Swappiece;
2696 CParse SwapParse;
2698 // Try to get the NewColumnOne and NewColumnTwo
2699 for ( Originali = 1; Originali<= oneColumn ->Size(); Originali++)
2701 oneOriginalMoprheme = oneColumn ->GetPiece(Originali).Display();
2703 if ( i ==0)
2705 oneOriginalMoprheme = oneOriginalMoprheme.right(oneOriginalMoprheme.length() -1);
2707 if ( oneOriginalMoprheme.length() ==0)
2709 oneOriginalMoprheme = QString("NULL");
2714 TempCSS = CStringSurrogate(oneOriginalMoprheme);
2715 OriginalColumn.Append(TempCSS);
2717 if ( oneOriginalMoprheme == QString("NULL"))
2720 NewColumnOne.Append(TempCSS);
2721 continue;
2724 if ( oneOriginalMoprheme.length() <= ThePrefixOrSuffix.length())
2726 NewColumnOne.Append(TempCSS);
2727 continue;
2730 if ( oneOriginalMoprheme.left(ThePrefixOrSuffix.length()) != ThePrefixOrSuffix)
2732 NewColumnOne.Append(TempCSS);
2733 continue;
2736 if (oneOriginalMoprheme.left(ThePrefixOrSuffix.length()) == ThePrefixOrSuffix)
2738 oneNewMorpheme = oneOriginalMoprheme.right(oneOriginalMoprheme.length() - ThePrefixOrSuffix.length());
2740 TempCSS1 = CStringSurrogate(oneNewMorpheme);
2742 NewColumnTwo.Append(TempCSS1);
2747 NewColumnTwo.Append(NULLCSS);
2749 // Get Original Robustness
2750 OlderRobustNess = GetRobustNessWithParadigmaticGraph(OriginalColumn, GlobalNodeStickNess2, TotalGlobalStickNess2, GlobalStickNess2, TotalWord2);
2752 // Calculate the alternative robustness to do
2753 NewRobustNess = GetRobustNessWithParadigmaticGraph(NewColumnOne, GlobalNodeStickNess2, TotalGlobalStickNess2, GlobalStickNess2, TotalWord2);
2754 NewRobustNess += GetRobustNessWithParadigmaticGraph(NewColumnTwo, GlobalNodeStickNess2, TotalGlobalStickNess2, GlobalStickNess2, TotalWord2);
2756 // If Diff satisfy the condition, we make the modification, then break;
2757 RobustNessDiff = NewRobustNess - OlderRobustNess;
2759 if (1) // RobustNessDiff < 0)
2762 // Change the template by replacing the two columns for the original one
2763 TempColumns = new CParse*[ m_NumberOfColumns + 1];
2765 for ( Replacei = m_NumberOfColumns -1; Replacei >=0; Replacei--)
2767 ReplaceOneColumn = m_Columns[Replacei];
2769 if ( Replacei > i)
2771 TempColumns[Replacei+1] = ReplaceOneColumn;
2774 if ( Replacei == i)
2776 TempColumns[Replacei+1] = new CParse(NewColumnTwo);
2777 TempColumns[Replacei] = new CParse(NewColumnOne);
2778 delete m_Columns[Replacei];
2781 if ( Replacei < i)
2783 TempColumns[Replacei] = ReplaceOneColumn;
2788 delete m_Columns;
2789 m_Columns = TempColumns;
2790 m_NumberOfColumns++;
2794 // If necessary, Put back the "#" back for the first column
2795 if ( i ==0)
2798 ReplaceFirstColumn = m_Columns[0];
2799 for ( Replacei = 1; Replacei <= ReplaceFirstColumn ->Size(); Replacei++)
2801 Swappiece = ReplaceFirstColumn ->GetPiece(Replacei).Display();
2803 if ( Swappiece == QString("NULL"))
2805 Swappiece = QString("#");
2807 else
2809 Swappiece = QString("#") + Swappiece;
2812 TempCSS = CStringSurrogate(Swappiece);
2813 SwapParse.Append(TempCSS);
2816 (*ReplaceFirstColumn)= SwapParse ;
2819 FoundAndModified = true;
2820 HasEverChanged = true;
2821 break;
2827 // Check through the other morpheme to see whether this morphemei could be suffix
2828 IsMeSuffixOfOthers = false;
2829 for ( itemj= 1; itemj <= oneColumn ->Size(); itemj++)
2831 Morphemej = oneColumn ->GetPiece(itemj).Display();
2833 if ( i ==0)
2835 Morphemej = Morphemej.right(Morphemej.length() -1);
2836 if ( Morphemej.length() ==0)
2838 Morphemej = QString("NULL");
2841 continue;
2845 if ( Morphemej.length() <= Morphemei.length())
2847 continue;
2850 if ( Morphemej.right(Morphemei.length()) != Morphemei)
2852 continue;
2855 IsMeSuffixOfOthers = true;
2856 break;
2859 // If found morphemei is prefix of at least one other morpheme
2860 if ( IsMeSuffixOfOthers)
2862 CParse OriginalColumn, NewColumnOne, NewColumnTwo;
2863 CParse* ReplaceFirstColumn;
2864 int Originali;
2865 QString oneOriginalMoprheme, oneNewMorpheme;
2866 float OlderRobustNess, NewRobustNess;
2867 float RobustNessDiff;
2868 int Replacei;
2869 CParse** TempColumns;
2870 CParse* ReplaceOneColumn;
2871 QString Swappiece;
2872 CParse SwapParse;
2874 // Try to get the NewColumnOne and NewColumnTwo
2875 for ( Originali = 1; Originali<= oneColumn ->Size(); Originali++)
2877 oneOriginalMoprheme = oneColumn ->GetPiece(Originali).Display();
2879 if ( i ==0)
2881 oneOriginalMoprheme = oneOriginalMoprheme.right(oneOriginalMoprheme.length() -1);
2883 if ( oneOriginalMoprheme.length() ==0)
2885 oneOriginalMoprheme = QString("NULL");
2889 TempCSS = CStringSurrogate(oneOriginalMoprheme);
2890 OriginalColumn.Append(TempCSS);
2892 if ( oneOriginalMoprheme == QString("NULL"))
2894 NewColumnTwo.Append(TempCSS);
2895 continue;
2898 if ( oneOriginalMoprheme.length() <= ThePrefixOrSuffix.length())
2900 NewColumnTwo.Append(TempCSS);
2901 continue;
2904 if ( oneOriginalMoprheme.right(ThePrefixOrSuffix.length()) != ThePrefixOrSuffix)
2906 NewColumnTwo.Append(TempCSS);
2907 continue;
2910 if (oneOriginalMoprheme.right(ThePrefixOrSuffix.length()) == ThePrefixOrSuffix)
2912 oneNewMorpheme = oneOriginalMoprheme.left(oneOriginalMoprheme.length() - ThePrefixOrSuffix.length());
2914 TempCSS1 = CStringSurrogate(oneNewMorpheme);
2915 NewColumnOne.Append(TempCSS1);
2919 NewColumnOne.Append(NULLCSS);
2921 // Get Original Robustness
2922 OlderRobustNess = GetRobustNessWithParadigmaticGraph(OriginalColumn, GlobalNodeStickNess2, TotalGlobalStickNess2, GlobalStickNess2, TotalWord2);
2924 // Calculate the alternative robustness
2925 NewRobustNess = GetRobustNessWithParadigmaticGraph(NewColumnOne, GlobalNodeStickNess2, TotalGlobalStickNess2, GlobalStickNess2, TotalWord2);
2926 NewRobustNess += GetRobustNessWithParadigmaticGraph(NewColumnTwo, GlobalNodeStickNess2, TotalGlobalStickNess2, GlobalStickNess2, TotalWord2);
2928 // If Diff satisfy the condition, we make the modification, then break;
2929 RobustNessDiff = NewRobustNess - OlderRobustNess;
2931 if (1) //RobustNessDiff < 0)
2934 // Change the template by replacing the two columns for the original one
2935 TempColumns = new CParse*[ m_NumberOfColumns + 1];
2937 for ( Replacei = m_NumberOfColumns -1; Replacei >=0; Replacei--)
2939 ReplaceOneColumn = m_Columns[Replacei];
2941 if ( Replacei > i)
2943 TempColumns[Replacei+1] = ReplaceOneColumn;
2946 if ( Replacei == i)
2948 TempColumns[Replacei+1] = new CParse(NewColumnTwo);
2949 TempColumns[Replacei] = new CParse(NewColumnOne);
2950 delete m_Columns[Replacei];
2953 if ( Replacei < i)
2955 TempColumns[Replacei] = ReplaceOneColumn;
2960 delete m_Columns;
2961 m_Columns = TempColumns;
2962 m_NumberOfColumns++;
2966 // if necessary, Put back the "#" back for the first column
2968 if ( i == 0)
2971 ReplaceFirstColumn = m_Columns[0];
2972 for ( Replacei = 1; Replacei <= ReplaceFirstColumn ->Size(); Replacei++)
2974 Swappiece = ReplaceFirstColumn ->GetPiece(Replacei).Display();
2976 if ( Swappiece == QString("NULL"))
2978 Swappiece = QString("#");
2980 else
2982 Swappiece = QString("#") + Swappiece;
2985 TempCSS = CStringSurrogate(Swappiece);
2986 SwapParse.Append(TempCSS);
2989 (*ReplaceFirstColumn)= SwapParse ;
2992 FoundAndModified = true;
2993 HasEverChanged = true;
2994 break;
3002 if ( FoundAndModified) break;
3007 }// According to FoundAndModified, while loop;
3010 // If this template is modified, output to logf
3011 if ( HasEverChanged)
3013 StringToParse OriginalWords, NewWords;
3014 StringToParse::iterator StringToParseIt;
3015 StringToInt NewCreatedWords;
3016 StringToInt::iterator StringToIntIt;
3017 QString TempWordString;
3018 CParse* dummyParse;
3019 QString DisplayOfOneColumn;
3022 QFile file( "PrefixOrSuffixMorphemeAdjust.txt" );
3024 if ( !file.open( QIODevice::WriteOnly | QIODevice::Append) )
3026 QMessageBox::information(NULL, "Error", "Can't Open the file!", "OK");
3027 return;
3030 Q3TextStream outf( &file );
3035 BackUpTemplate ->GetWordsAndParses(OriginalWords);
3038 for ( StringToParseIt = OriginalWords.begin(); StringToParseIt != OriginalWords.end(); StringToParseIt++)
3040 dummyParse = StringToParseIt.data();
3041 delete dummyParse;
3044 this ->GetWordsAndParses(NewWords);
3046 for ( StringToParseIt = NewWords.begin(); StringToParseIt != NewWords.end(); StringToParseIt++)
3048 dummyParse = StringToParseIt.data();
3049 TempWordString = StringToParseIt.key();
3051 delete dummyParse;
3053 if ( !OriginalWords.contains(TempWordString))
3055 NewCreatedWords.insert(TempWordString, NULL);
3062 outf << "***********"<<Loopi<<"************" <<endl;
3063 outf << " Original Template:" << endl;
3064 outf << " ";
3067 for ( i = 0; i < BackUpTemplate ->m_NumberOfColumns; i++)
3069 oneColumn = BackUpTemplate ->m_Columns[i];
3071 DisplayOfOneColumn = QString("{ ");
3072 for (int j = 1; j <= oneColumn ->Size();j++)
3074 DisplayOfOneColumn += oneColumn ->GetPiece(j).Display();
3075 if ( j != oneColumn ->Size())
3077 DisplayOfOneColumn += QString(" , ");
3081 DisplayOfOneColumn += QString(" }");
3082 outf << DisplayOfOneColumn;
3083 if ( i != m_NumberOfColumns -1)
3085 outf << " --> ";
3090 outf << endl;
3091 outf << endl;
3093 outf << " New Template:" << endl;
3094 outf << " ";
3096 for ( i = 0; i < m_NumberOfColumns; i++)
3098 oneColumn = m_Columns[i];
3100 DisplayOfOneColumn = QString("{ ");
3101 for (int j = 1; j <= oneColumn ->Size();j++)
3103 DisplayOfOneColumn += oneColumn ->GetPiece(j).Display();
3104 if ( j != oneColumn ->Size())
3106 DisplayOfOneColumn += QString(" , ");
3110 DisplayOfOneColumn += QString(" }");
3111 outf << DisplayOfOneColumn;
3112 if ( i != m_NumberOfColumns -1)
3114 outf << " --> ";
3118 outf << endl;
3119 outf << endl;
3121 outf << " New Created Words:" << endl;
3122 outf << " { ";
3124 for ( StringToIntIt = NewCreatedWords.begin(); StringToIntIt != NewCreatedWords.end(); StringToIntIt++)
3126 TempWordString = StringToIntIt.key();
3127 outf << TempWordString << " , ";
3130 outf << " }"<< endl;
3131 outf << endl;
3132 outf << endl;
3133 outf << endl;
3135 file.close();
3140 delete BackUpTemplate;
3146 float CTemplate::GetRobustNessWithParadigmaticGraph(CParse& oneColumn, StringToFloat& MorphemeAndItsComplexity, StringToStringToFloat& GlobalStickNess, float TotalGlobalStickNess, float TotalWords)
3150 int i, j;
3151 QString oneMorpheme, anotherMorpheme;
3152 QString HostMorpheme, SlaveMorpheme;
3153 float NodeRobustNess;
3154 float EdgeRobustNess;
3155 float oneFloat;
3156 StringToFloat* oneCollection;
3159 NodeRobustNess =0.0;
3160 EdgeRobustNess = 0.0;
3163 for ( i=1; i<= oneColumn.Size(); i++)
3165 oneMorpheme = oneColumn.GetPiece(i).Display();
3167 if (MorphemeAndItsComplexity.contains(oneMorpheme))
3169 oneFloat = MorphemeAndItsComplexity[oneMorpheme];
3170 NodeRobustNess += oneFloat;
3172 else
3174 NodeRobustNess += (-base2log(1.0/TotalWords)); // Big Penalty
3178 for ( j =i+1; j<= oneColumn.Size(); j++)
3180 anotherMorpheme = oneColumn.GetPiece(j).Display();
3182 if ( oneMorpheme > anotherMorpheme )
3184 if ( GlobalStickNess.contains(oneMorpheme))
3186 oneCollection = GlobalStickNess[oneMorpheme];
3188 if ( oneCollection ->contains(anotherMorpheme))
3190 oneFloat = (*oneCollection)[anotherMorpheme];
3191 EdgeRobustNess += oneFloat;
3193 else
3195 EdgeRobustNess += -base2log(1.0/TotalGlobalStickNess); // Big Penalty
3200 else
3202 EdgeRobustNess += -base2log(1.0/TotalGlobalStickNess); // Big Penalty
3205 else
3207 if ( GlobalStickNess.contains(anotherMorpheme))
3209 oneCollection = GlobalStickNess[anotherMorpheme];
3211 if ( oneCollection ->contains(oneMorpheme))
3213 oneFloat = (*oneCollection)[oneMorpheme];
3214 EdgeRobustNess += oneFloat;
3216 else
3218 EdgeRobustNess += -base2log(1.0/TotalGlobalStickNess); // Big Penalty
3223 else
3225 EdgeRobustNess += -base2log(1.0/TotalGlobalStickNess); // Big Penalty
3234 return (NodeRobustNess + EdgeRobustNess);
3245 // Answer the Q: does this template describe prefixes, or suffixes?
3246 eAffixationType CTemplate::DetermineAffixationSide()
3248 float threshold = 2;
3249 int MinimumNumberOfStems = 3;
3250 int ColumnNumber = 0;
3251 int MinimumStemLength = 4;
3253 if ( m_NumberOfColumns != 2 ) return TYPE_Unknown;
3255 //#############################################################/
3256 // initial templates have only 1 column with more than 1 entry; deal with them first
3257 int NumberOfColumnsWithMoreThan1Entry = 0;
3258 for (int i = 0; i < m_NumberOfColumns; i++)
3259 { if ( m_Columns[i]->Size() > 1 )
3260 NumberOfColumnsWithMoreThan1Entry ++;
3261 ColumnNumber = i;
3263 //###########################################################//
3264 if ( NumberOfColumnsWithMoreThan1Entry == 1 )
3266 int OtherColumn = 2 - ColumnNumber;
3268 //###########################################################//
3269 else
3273 // We consider regular templates, with more than one column containing more than 1 entry
3274 float N1 = (float) m_Columns[0]->GetLength();
3275 float N2 = (float) m_Columns[1]->GetLength();
3278 if ( N1/N2 > threshold && m_Columns[0]->Size() >= MinimumNumberOfStems )
3280 return TYPE_Suffix;
3282 if ( N2/N1 > threshold && m_Columns[1]->Size() >= MinimumNumberOfStems)
3284 return TYPE_Prefix;
3286 return TYPE_Unknown;
3289 //#########################################################//
3291 return TYPE_Unknown;
3294 void CTemplate::AddToCollections ( eAffixationType ThisType, CStemCollection* Stems, CAffixCollection* Affixes, CSignatureCollection* Signatures)
3297 CStem* pStem;
3298 CSignature* pSig,
3299 *qSig;
3300 CParse Sig1, Sig2;
3301 CStringSurrogate ssPiece;
3302 CString WordBoundary("#"),
3303 Null ("NULL");
3304 // Right now, there is a spurious '#' at the beginning of words, so we have to
3305 // remove this from each prefix and prefix signature. The # should be removed,
3306 // and this code simplified, so that prefixes are just like suffixes.
3308 if (ThisType == TYPE_Prefix)
3310 for (int i = 1; i <= m_Columns[0]->Size(); i++)
3312 Sig1 = m_Columns[0]->GetAt_SS(i);
3313 for (int i=1; i <= Sig1.Size(); i++)
3315 ssPiece = Sig1.GetAt_SS(i);
3316 if ( ssPiece == WordBoundary )
3318 Sig2.AppendInAlphabeticalOrder (Null );
3319 ssPiece = Null;
3321 else if ( ssPiece[0] == '#' )
3323 ssPiece = ssPiece.Mid(1);
3324 Sig2.AppendInAlphabeticalOrder ( ssPiece );
3326 *Affixes << ssPiece;
3328 qSig = *Signatures << &Sig2;
3329 qSig->SetAffixLocation ( WORD_INITIAL ); //this is probably no longer necessary; todo
3331 for (i = 1; i <= m_Columns[1]->Size(); i++)
3333 pStem = *Stems << m_Columns[1]->GetAt_SS(i);
3334 qSig->GetStemPtrList()->AddTail(pStem);
3335 pStem->SetSuffixSignature (qSig);
3338 return;
3340 else if ( ThisType == TYPE_Suffix)
3342 pSig = *Signatures << m_Columns[1];
3344 for (int i = 1; i <= m_Columns[1]->Size(); i++)
3346 *Affixes << m_Columns[1]->GetAt_SS(i);
3348 for (i = 1; i <= m_Columns[0]->Size(); i++)
3350 pStem = *Stems << m_Columns[0]->GetAt_SS(i);
3351 pSig->GetStemPtrList()->AddTail(pStem);
3352 pStem->SetSuffixSignature (pSig);
3356 return;
3358 return;
3368 //////////////////////////////////////////////////////////////////
3369 ////// CStateListViewItem Implementation Section
3371 CTemplateListViewItem::CTemplateListViewItem(Q3ListView *parent,
3372 CTemplate* templ, QString label)
3373 : Q3ListViewItem(parent, label),
3374 m_Template(templ),
3375 m_MaxMumShownMorphemes(4) { }
3377 CTemplateListViewItem::CTemplateListViewItem(Q3ListViewItem* parent,
3378 CTemplate* templ, QString label)
3379 : Q3ListViewItem(parent, label),
3380 m_Template(templ),
3381 m_MaxMumShownMorphemes(4) { }
3383 QString CTemplateListViewItem::key( int column, bool ascending ) const
3385 if(m_Template)
3387 QString QSstring = "";
3388 int NumberOfMorphemes;
3391 switch( column )
3393 case 0:
3394 return QString("%1").arg(m_Template ->m_TemplateNumber,10);
3395 case 1:
3396 return QString("%1").arg(m_Template ->m_NumberOfColumns,10);
3397 case 2:
3398 return QString("%1").arg(m_Template ->m_NewSortComplexity, 10);
3399 case 3:
3401 if (m_Template-> m_Columns[0] )
3403 NumberOfMorphemes = m_Template->m_Columns[0]->Size() ;
3405 else
3407 NumberOfMorphemes = 0;
3409 return QString("%1").arg(NumberOfMorphemes,10);
3411 case 4:
3413 if (m_Template-> m_Columns[1] )
3415 NumberOfMorphemes = m_Template->m_Columns[1]->Size() ;
3417 else
3419 NumberOfMorphemes = 0;
3421 return QString("%1").arg(NumberOfMorphemes,10);
3423 case 5:
3425 if (m_Template->m_NumberOfColumns >= 3 )
3427 NumberOfMorphemes = m_Template->m_Columns[2]->Size() ;
3429 else
3431 NumberOfMorphemes = 0;
3433 return QString("%1").arg(NumberOfMorphemes,10);
3435 case 6:
3437 if (m_Template->m_NumberOfColumns >= 4 )
3439 NumberOfMorphemes = m_Template->m_Columns[3]->Size() ;
3441 else
3443 NumberOfMorphemes = 0;
3445 return QString("%1").arg(NumberOfMorphemes,10);
3447 default:
3448 return Q3ListViewItem::text( column );
3451 else return Q3ListViewItem::key( column, ascending );
3456 QString CTemplateListViewItem::text( int column ) const
3459 if(m_Template)
3461 QString QSstring = "";
3462 int i;
3463 int NumberOfMorphemes;
3464 int NumberOfDisplayedMorphemes;
3465 bool ShowPlus;
3466 // int PrintedMorphemes=0;
3467 QString QSOneMorpheme, QSReversedOneMorpheme;
3468 CStringSurrogate SSOneMorpheme;
3469 // const char* CCDebugString1, *CCDebugString2;
3472 switch( column )
3474 case 0:
3475 return QString("%1").arg( m_Template->m_TemplateNumber);
3476 case 1:
3477 return QString("%1").arg( m_Template ->m_NumberOfColumns);
3478 case 2:
3479 return QString("%1").arg( m_Template ->m_NewSortComplexity);
3480 case 3:
3483 if ( m_Template->m_NumberOfColumns < 1)
3485 return QString("");
3488 NumberOfMorphemes = m_Template->m_Columns[0] ->Size() ;
3490 NumberOfDisplayedMorphemes = m_MaxMumShownMorphemes;
3491 ShowPlus = true;
3492 if(NumberOfMorphemes <= m_MaxMumShownMorphemes)
3494 NumberOfDisplayedMorphemes = NumberOfMorphemes;
3495 ShowPlus = false;
3498 for( i = 1; i <= NumberOfDisplayedMorphemes ; i++ )
3500 QSstring.append( m_Template->m_Columns[0] ->GetPiece(i).Display() + ", ");
3502 QSstring = QSstring.left( QSstring.length() - 2 );
3504 if(ShowPlus)
3506 QSstring += " +";
3508 return QSstring;
3510 case 4:
3512 if ( m_Template->m_NumberOfColumns < 2)
3514 return QString("");
3517 NumberOfMorphemes = m_Template->m_Columns[1] ->Size() ;
3519 NumberOfDisplayedMorphemes = m_MaxMumShownMorphemes;
3520 ShowPlus = true;
3521 if(NumberOfMorphemes <= m_MaxMumShownMorphemes)
3523 NumberOfDisplayedMorphemes = NumberOfMorphemes;
3524 ShowPlus = false;
3527 for( i = 1; i <= NumberOfDisplayedMorphemes ; i++ )
3529 QSstring.append( m_Template->m_Columns[1] ->GetPiece(i).Display() + ", ");
3531 QSstring = QSstring.left( QSstring.length() - 2 );
3533 if(ShowPlus)
3535 QSstring += " +";
3537 return QSstring;
3539 case 5:
3541 if ( m_Template->m_NumberOfColumns < 3)
3543 return QString("");
3546 NumberOfMorphemes = m_Template->m_Columns[2] ->Size() ;
3548 NumberOfDisplayedMorphemes = m_MaxMumShownMorphemes;
3549 ShowPlus = true;
3550 if(NumberOfMorphemes <= m_MaxMumShownMorphemes)
3552 NumberOfDisplayedMorphemes = NumberOfMorphemes;
3553 ShowPlus = false;
3556 for( i = 1; i <= NumberOfDisplayedMorphemes ; i++ )
3558 QSstring.append( m_Template->m_Columns[2] ->GetPiece(i).Display() + ", ");
3560 QSstring = QSstring.left( QSstring.length() - 2 );
3562 if(ShowPlus)
3564 QSstring += " +";
3566 return QSstring;
3568 case 6:
3570 if ( m_Template->m_NumberOfColumns < 4)
3572 return QString("");
3575 NumberOfMorphemes = m_Template->m_Columns[3] ->Size() ;
3577 NumberOfDisplayedMorphemes = m_MaxMumShownMorphemes;
3578 ShowPlus = true;
3579 if(NumberOfMorphemes <= m_MaxMumShownMorphemes)
3581 NumberOfDisplayedMorphemes = NumberOfMorphemes;
3582 ShowPlus = false;
3585 for( i = 1; i <= NumberOfDisplayedMorphemes ; i++ )
3587 QSstring.append( m_Template->m_Columns[3] ->GetPiece(i).Display() + ", ");
3589 QSstring = QSstring.left( QSstring.length() - 2 );
3591 if(ShowPlus)
3593 QSstring += " +";
3595 return QSstring;
3598 default:
3599 return Q3ListViewItem::text( column );
3602 else return Q3ListViewItem::text( column );