HowManyAreAnalyzed(): use status_user_agent to report progress
[linguistica.git] / MiniLexicon_CheckAffixes.cpp
blobbd1036aba15e6caafdae365a61b652f91c597d89
1 // Reconsidering discovered suffix-based morphology
2 // Copyright © 2009 The University of Chicago
3 #include "MiniLexicon.h"
5 #include <memory>
6 #include "ui/Status.h"
7 #include "DLHistory.h"
8 #include "Lexicon.h"
9 #include "Signature.h"
10 #include "Suffix.h"
11 #include "Prefix.h"
12 #include "Affix.h"
13 #include "Stem.h"
14 #include "SignatureCollection.h"
15 #include "SuffixCollection.h"
16 #include "PrefixCollection.h"
17 #include "StemCollection.h"
18 #include "WordCollection.h"
19 #include "HTML.h"
21 void CMiniLexicon::CheckSignatures() // Suffixes/Check signatures
23 int NumberOfLettersToShift = 0;
24 int InternalCount = 0;
25 int LoopCount = 0;
26 int LoopLimit = m_pLexicon->GetIntParameter( "CheckSignatures\\LoopLimit", 1 ); // 3 );
27 //int i;
28 CSignature* pSig;
29 CStem* pStem;
30 QString Null = "NULL", msg;
31 CStringSurrogate ssStem;
32 CParse PWord;
33 const int StemCountThreshold = m_pLexicon->GetIntParameter( "CheckSignatures\\StemCountThreshold", 2 );
34 CStringSurrogate ssAffix;
35 bool analyzingSuffixes = TRUE;
36 if( m_AffixLocation == STEM_INITIAL || m_AffixLocation == WORD_INITIAL ) analyzingSuffixes = FALSE;
38 std::auto_ptr<CSignatureCollection> Actions(analyzingSuffixes ?
39 new CSignatureCollection(this, m_pSuffixes, m_AffixLocation ) :
40 new CSignatureCollection(this, m_pPrefixes, m_AffixLocation ));
42 linguistica::ui::status_user_agent& status = m_pLexicon->status_display();
43 status.major_operation = QString(
44 "Mini-Lexicon %1: Check signatures: stem/suffix edge.")
45 .arg(m_Index+1);
46 status.progress.clear();
48 QHash<CSignature*, int> HowManyLettersToShift;
49 QList<CSignature*> SignaturesToModify;
51 if (analyzingSuffixes) { LogFileLargeTitle("Phase: Check signatures (stem/suffix edge"); }
52 else { LogFileLargeTitle("Phase: Check signatures (prefix/stem edge"); }
54 //======================================================================//
55 // Principal loop, through Signatures
56 //======================================================================//
58 while ( LoopCount < LoopLimit )
60 LoopCount++;
61 SignaturesToModify.clear();
62 InternalCount = 0;
63 m_pSignatures->Sort(SIGS);
66 //----------------------------------------------------------------//
67 // Call to "CheckOut" to check each signature.
68 //----------------------------------------------------------------//
70 status.progress.set_denominator(m_pSignatures->GetCount());
71 for ( int signo = 0; signo < (int)m_pSignatures->GetCount(); signo++)
73 msg = QString("%1").arg(LoopCount) + ": " + QString("%1").arg( m_pSignatures->GetCount() - signo );
74 status.details = msg;
75 status.progress = signo;
77 pSig = m_pSignatures->GetAtSort(signo);
78 pSig->SetAffixLocation( m_AffixLocation );
80 if ( pSig->GetNumberOfStems() < StemCountThreshold ) { continue; }
82 //==========================================================
83 NumberOfLettersToShift = pSig->CheckOut(this);
84 //==========================================================
86 if ( NumberOfLettersToShift > 0)
88 InternalCount ++;
89 SignaturesToModify.append(pSig);
90 HowManyLettersToShift.insert (pSig,NumberOfLettersToShift);
92 } // end of signo loop
94 if (InternalCount == 0) {
95 // There are no signatures being modified.
96 // Leave function.
97 status.details.clear();
98 // XXX. not really an operation.
99 status.major_operation = (analyzingSuffixes ?
100 QString("Mini-Lexicon %1: End of Check signatures: stem/suffix edge.") :
101 QString("Mini-Lexicon %1: End of Check signatures: prefix/stem edge."))
102 .arg(m_Index+1);
103 LogFile("No signatures to modify now");
104 return;
108 //----------------------------------------------------------------//
109 // Section *A*
110 // Now we make the changes in the words which we have identified above.
111 // Bear in mind that the (positive or negative) integer in Sig.CorpusCount is the number of
112 // letters to the right or left that the stem/suffix cut should be shifted.
114 //----------------------------------------------------------------//
116 LogFileSmallTitle("Remaking signature");
117 LogFileHeader("New signature", "Old stem", "New stem");
119 for (int signo = 0; signo < (int)Actions->GetCount(); signo++)
121 pSig = SignaturesToModify.at(signo);
122 int NumberOfLettersShifted = HowManyLettersToShift.value(pSig);
123 LogFile(pSig->Display());
124 for (int stemno = 0; stemno < pSig->GetNumberOfStems(); stemno++)
126 pStem = pSig->GetStem(stemno);
127 ssStem = pStem->GetKey();
128 LogFile("", "", ssStem.Display());
129 for (int affixno = 1; affixno <= pSig->Size(); affixno++)
131 ssAffix = pSig->GetPiece(affixno);
132 if ( ssAffix == CStringSurrogate(Null) )
134 ssAffix.MakeNull();
137 if( analyzingSuffixes ) PWord = ssStem + ssAffix;
138 else PWord = ssAffix + ssStem;
140 CStem* pWord = *m_pWords ^= PWord;
142 if( analyzingSuffixes )
144 if( ssAffix.GetLength() == 0 && pWord && pWord->GetSuffixLoc() > 0 ) continue; // the stem has an internal analysis already 3/2003
146 else
148 if( ssAffix.GetLength() == 0 && pWord && pWord->GetStemLoc() > 0 ) continue;
151 if (pWord->GetWordType() == CStem::BIWORD_COMPOUND ||
152 pWord->GetWordType() == CStem::MULTIPLE_COMPOUND ||
153 pWord->GetWordType() == CStem::POSSIBLE_COMPOUND)
154 continue;
156 Q_ASSERT ( pWord->IsValid() );
158 if ( (int)ssStem.GetLength() <= NumberOfLettersShifted ) { continue; } ;
159 // TODO: do the same thing below for prefixes if necessary
161 if( analyzingSuffixes ) pWord->ShiftStemSuffixBoundary ( -1 * NumberOfLettersShifted );
162 //else pWord->ShiftPrefixStemBoundary( pSig->GetCorpusCount() );
164 Q_ASSERT ( pWord->IsValid() );
165 LogFile(pWord->GetStem().Display());
166 }// end of affixno loop
168 LogFileEndRow();
169 } //end of signo loop
170 //----------------------------------------------------------------//
171 // End of Section *A*
173 //----------------------------------------------------------------//
174 LogFileEndTable();
175 } // end of LoopCount loop;
177 //-------------------------------------------------------------//
178 //////////////////////////////////////////////////////////////////////////////
179 // Redo Signatures
180 QString Remark ("Checking signatures");
181 CStringSurrogate ssRemark ( Remark);
182 RebuildAffixesStemsAndSignaturesFromWordSplits( ssRemark );
185 // XXX. not an operation
186 status.major_operation = (analyzingSuffixes ?
187 QString("Mini-Lexicon %1: "
188 "End of Check signatures: stem/suffix edge.") :
189 QString("Mini-Lexicon %1: "
190 "End of Check signatures: prefix/stem edge." ))
191 .arg(m_Index + 1);
192 status.progress.clear();
193 status.details.clear();
195 QString mini_name( "Mini-Lexicon %1" );
196 mini_name = mini_name.arg( GetIndex() + 1 );
197 QString remark = "Check stem/suffix cut";
198 GetDLHistory()->append( mini_name, remark, this );