FIX for bug: Signature was not displayed in CWordListViewItem for Prefix-mini lexicon
[linguistica.git] / Allomorphy.cpp
blobb70fb3ceb69db3676855bb8fbca29e9b0b1a7130
1 // Implementation of AffixAlignment, SignatureAlignment methods
2 // Copyright © 2009 The University of Chicago
3 #include "Allomorphy.h"
5 #include <QTextStream>
6 #include <QStringList>
7 #include <QtAlgorithms>
8 #include "Signature.h"
9 #include "StringSurrogate.h"
10 #include "Parse.h"
11 #include "StringFunc.h"
12 #include "HTML.h"
13 #include "Typedefs.h"
14 //---------------------------------------------------------------------//
15 class LxStringList {
16 QStringList m_stringlist;
17 public:
18 LxStringList(CParse*);
19 void SortBySizeRightAlphabetical();
20 int size() { return m_stringlist.size(); }
21 QString at(int stringno);
22 QString operator[](int i){ return m_stringlist[i];};
23 QString LongestCommonPrefix();
24 void Remove(QString);
25 bool contains (QString string) { return m_stringlist.contains(string); }
29 //---------------------------------------------------------------------//
30 LxStringList::LxStringList(CParse* parse)
32 parse->CreateQStringList (m_stringlist);
33 for (int affixno = 0; affixno < m_stringlist.size(); affixno++)
35 if (m_stringlist[affixno]=="NULL")
36 m_stringlist[affixno].clear();
43 //---------------------------------------------------------------------//
44 // str1 is ranked before str2 if str1 is longer than str2; if they are the same length, then if it is alphabetically prior,
45 // scanning both strings from right to left.
46 bool comparefunction1(QString str1, QString str2){
47 if (str1.length() > str2.length() ) return TRUE;
48 if (str2.length() > str2.length() ) return FALSE;
49 for (int i = str1.length()-1; i >= 0 ; i--)
51 if (str1[i] < str2[i] ) return TRUE;
52 if (str1[i] > str2[i] ) return FALSE;
54 return FALSE;
56 //---------------------------------------------------------------------//
57 void LxStringList::SortBySizeRightAlphabetical()
59 qSort(m_stringlist.begin(), m_stringlist.end(), comparefunction1);
61 //---------------------------------------------------------------------//
67 //---------------------------------------------------------------------//
68 QString LxStringList::at(int stringno)
70 return m_stringlist[stringno];
73 //---------------------------------------------------------------------//
74 bool StringSubtractFromRight(QString string1, QString string2, QString& difference)
76 // If string2 is not a suffix of string1, return FALSE;
77 // Else, difference = string1.left (string1.length - string2.length )
79 if ( string1.endsWith(string2) ){
80 difference = string1.left( string1.length() - string2.length() );
81 return TRUE;
83 difference.clear();
84 return FALSE;
87 //---------------------------------------------------------------------//
88 QString LxStringList::LongestCommonPrefix()
89 { int prefixlength;
90 QString prefix = m_stringlist[0];
91 prefixlength = prefix.length();
93 for (int morphno = 1; morphno < m_stringlist.size(); morphno++)
95 QString morph = m_stringlist[morphno];
96 int morphlength = morph.length();
97 int letterno;
98 for (letterno = 0; letterno < morph.length() && letterno < prefix.length(); letterno++)
100 if ( prefix[letterno] == morph[letterno] ) continue;
101 if ( letterno == 0) { prefix.clear(); return prefix; }
103 if (letterno == morphlength-1) // so match is as good as it can get...
105 prefix = prefix.left(letterno+1);
106 prefixlength = prefix.length();
108 else if (letterno == prefixlength-1) // also, match is as good as it can get
110 continue;
112 else // we have a non-zero match with this morph; we'll note that and go on to next morph.
114 prefix = prefix.left(letterno+1);
115 prefixlength= prefix.length();
117 } // end of loop over morphs in StringList
118 return prefix;
120 //---------------------------------------------------------------------//
121 void LxStringList::Remove(QString string)
123 m_stringlist.remove(string);
128 //---------------------------------------------------------------------//
129 AffixAlignment::AffixAlignment(QString Affix1, QString Affix2)
130 : m_OriginalAffix1(), m_OriginalAffix2(),
131 m_Affix1(Affix1), m_Affix2(Affix2),
132 m_Margin1(), m_Margin2(),
133 m_Shift1(), m_Shift2(),
134 m_Status(Affix1 == Affix2 ? IDENTICAL : DIFFERENT),
135 m_Agreement_unigram(0.0),
136 m_Agreement_bigram(0.0),
137 m_Disagreement_unigram(0.0),
138 m_Disagreement_bigram(0.0) { }
140 AffixAlignment::AffixAlignment(QString Margin1, QString Affix1,
141 QString Margin2, QString Affix2)
142 : m_OriginalAffix1(), m_OriginalAffix2(),
143 m_Affix1(Affix1), m_Affix2(Affix2),
144 m_Margin1(Margin1), m_Margin2(Margin2),
145 m_Shift1(), m_Shift2(),
146 m_Status(Affix1 == Affix2 ? IDENTICAL : DIFFERENT),
147 m_Agreement_unigram(0.0),
148 m_Agreement_bigram(0.0),
149 m_Disagreement_unigram(0.0),
150 m_Disagreement_bigram(0.0) { }
151 //---------------------------------------------------------------------//
152 // Constructor
153 //---------------------------------------------------------------------//
154 CSignatureAlignment::CSignatureAlignment(CSignature* Sig1, CSignature* Sig2)
155 : m_SigPtr1(Sig1), m_SigPtr2(Sig2),
156 m_AffixAlignments(),
157 m_Sig1AlignedAffixes(), m_Sig2AlignedAffixes()
158 { LxStringList TempList1(Sig1), TempList2(Sig2);
159 int smallersize = TempList1.size();
160 if (TempList2.size() < smallersize)
162 smallersize = TempList2.size();
165 int sig1length=0, sig2length = 0;
166 for (int affixno = 0; affixno < smallersize; affixno++)
168 sig1length += TempList1[affixno].length();
169 sig2length += TempList2[affixno].length();
171 if (sig1length <= sig2length)
173 m_SigPtr1 = Sig1;
174 m_SigPtr2 = Sig2;
175 }else
177 m_SigPtr1 = Sig2;
178 m_SigPtr2 = Sig1;
181 //---------------------------------------------------------------------//
182 bool CSignatureAlignment::FindWhetherOneIsSuffixOfTheOther()
184 // First, sort both by Size and Right-side alphabetical:
186 // We don't need to keep these local variables here, now that the signatures are kept in the SigAlignment. Fix? JG.
187 LxStringList* ShorterSig, *LongerSig;
188 QString suffix1, suffix2, ShorterSuffix, LongerSuffix;
190 LxStringList SigList1 (m_SigPtr1);
191 LxStringList SigList2 (m_SigPtr2);
192 SigList1.SortBySizeRightAlphabetical();
193 SigList2.SortBySizeRightAlphabetical();
194 suffix1 = SigList1.at(0);
195 suffix2 = SigList2.at(0);
196 int size = SigList1.size();
197 if (suffix1.length() == suffix2.length()) {return FALSE;}
198 else if (suffix1.length() < suffix2.length()) {
199 LongerSig = &SigList2;
200 ShorterSig = &SigList1;
202 m_LongerSig = m_SigPtr2;
203 m_ShorterSig= m_SigPtr1;
205 LongerSuffix = suffix2;
206 ShorterSuffix = suffix1;
207 } else {
208 LongerSig = &SigList1;
209 ShorterSig = &SigList2;
211 m_LongerSig = m_SigPtr1;
212 m_ShorterSig = m_SigPtr2;
214 LongerSuffix = suffix1;
215 ShorterSuffix = suffix2;
219 bool b_diff = StringSubtractFromRight(LongerSuffix, ShorterSuffix,m_difference);
220 if (b_diff == FALSE) {
221 return FALSE;
223 // this checks that the difference between the corresponding suffixes is identical across the signature's suffixes:
224 for (int suffixno = 1; suffixno < size; suffixno++){
225 if ( LongerSig->at(suffixno) != m_difference + ShorterSig->at(suffixno) )
227 return FALSE;
231 return TRUE;
236 void CSignatureAlignment::FindBestAlignment()
239 QString Morph, OtherMorph, MarginPiece;
240 AffixAlignment* pAlign;
241 CParse Margins1, Margins2, Suffixes1, Suffixes2;
242 int morphlength, othermorphlength;
243 CSS ssMorph, ssOtherMorph;
245 LxStringList SigList1 (m_SigPtr1);
246 LxStringList SigList2 (m_SigPtr2);
247 SigList1.SortBySizeRightAlphabetical();
248 SigList2.SortBySizeRightAlphabetical();
250 QMap<QString,int> AffixesMatched1, AffixesMatched2, MarginsFound1, MarginsFound2;
253 //-------------------------------- Step 1 --------------------------------------------------------//
254 // first, look for identical affixes in the sigs;
255 for (int i = 0; i < SigList1.size(); i++)
257 Morph = SigList1[i];
258 if (AffixesMatched1.contains(Morph) ) continue;
259 for (int j = 0; j < SigList2.size(); j++)
261 OtherMorph = SigList2[j];
262 if ( AffixesMatched2.contains(OtherMorph) ) continue;
263 if (Morph==OtherMorph)
265 if (Morph.length() == 0) { continue; }
266 pAlign = new AffixAlignment (Morph, Morph);
267 m_AffixAlignments.append( pAlign );
268 AffixesMatched1 [Morph] = 1;
269 AffixesMatched2 [Morph] = 1;
270 break;
275 //-------------------------------- Step 2 --------------------------------------------------------//
276 // now look for non-identical but end-matching affix pairs, like "es/s"
277 // Don't consider any suffixes which have been identified in Step 1 above (where there was an identical homolog)
279 for (int i = 0; i < SigList1.size(); i++)
281 Morph = SigList1[i];
282 if (AffixesMatched1.contains(Morph) ) continue;
283 morphlength = Morph.length();
284 if (morphlength == 0) continue;
285 for (int j = 0; j < SigList2.size(); j++)
287 OtherMorph = SigList2[j];
288 if ( AffixesMatched2.contains(OtherMorph) ) continue;
289 othermorphlength = OtherMorph.length();
290 if (morphlength == 0) continue;
291 if ( OtherMorph.right( morphlength ) == Morph )
293 MarginPiece = OtherMorph.left(othermorphlength - morphlength);
294 pAlign = new AffixAlignment (TheStringNULL, Morph,
295 MarginPiece, Morph );
296 m_AffixAlignments.append(pAlign);
297 AffixesMatched1[Morph] = 1;
298 AffixesMatched2[OtherMorph] = 1;
299 MarginsFound1[TheStringNULL] = 1;
300 MarginsFound2[MarginPiece] = 1;
301 break;
303 if (morphlength>0 && othermorphlength > 0 && Morph.right(othermorphlength) == OtherMorph )
305 MarginPiece = Morph.left( morphlength - othermorphlength );
306 pAlign = new AffixAlignment ( MarginPiece, OtherMorph,
307 TheStringNULL, OtherMorph );
308 m_AffixAlignments.append(pAlign);
309 AffixesMatched1[Morph] = 1;
310 AffixesMatched2[OtherMorph] = 1;
311 MarginsFound1[MarginPiece] = 1;
312 MarginsFound2[TheStringNULL] =1 ;
313 break;
318 //-------------------------------- Step 3 --------------------------------------------------------//
319 // if one of the signatures has an X in its margin region, and it also has an X as an affix and the other
320 // signature has a NULL, then we can align the X and the NULL:
322 //if (m_SigPtr1->ContainsNULL())
323 if (SigList1.contains("") )
325 for (int suffixno = 0; suffixno < SigList2.size(); suffixno++)
327 Morph = SigList2[suffixno];
329 if (Morph.length() == 0) continue;
330 if ( AffixesMatched2.contains(Morph) ) continue;
331 if ( MarginsFound2.contains( Morph ) )
333 pAlign = new AffixAlignment ( TheStringNULL, TheStringNULL, Morph, TheStringNULL);
334 m_AffixAlignments.append(pAlign);
335 AffixesMatched1[""] = 1;
336 AffixesMatched2[Morph];
341 if (SigList2.contains("") && ! AffixesMatched2.contains("") )
343 for (int suffixno = 0; suffixno < SigList1.size(); suffixno++)
345 Morph = SigList1[suffixno] ;
346 if (Morph.length() == 0) continue;
347 if (AffixesMatched1.contains(Morph) ) continue;
348 if ( MarginsFound1.contains( Morph ) )
350 pAlign = new AffixAlignment ( Morph, TheStringNULL, TheStringNULL, TheStringNULL);
351 m_AffixAlignments.append(pAlign);
352 AffixesMatched2[""] = 1;
353 AffixesMatched1[Morph] = 1;
358 //-------------------------------- Step 4 --------------------------------------------------------//
360 // match NULL against NULL
361 if (SigList1.contains("") && ! AffixesMatched1.contains("") && SigList2.contains("") && !AffixesMatched2.contains("") )
363 pAlign = new AffixAlignment ( TheStringNULL, TheStringNULL, TheStringNULL, TheStringNULL);
364 m_AffixAlignments.append(pAlign);
365 //MarginsFound1[TheStringNULL] = 1 ;
366 AffixesMatched1[TheStringNULL] = 1;
367 AffixesMatched2[TheStringNULL] = 1;
374 // Deprecated: (use following function)
375 void CSignatureAlignment::Display(QTextStream& LogStream)
377 AffixAlignment* pAlign;
379 if (m_AffixAlignments.count() > 0)
381 LogStream <<
383 StartTable <<
384 StartTableRow <<
385 TableData (m_SigPtr1->Display('-')) <<
386 TableData (m_SigPtr2->Display('-')) << EndTableRow <<
387 StartTableRow <<
388 MakeTableHeader ("Shift region 1") <<
389 MakeTableHeader ("Margin region 1") <<
390 MakeTableHeader ("Affix 1") <<
391 MakeTableHeader ("Shift region 2") <<
392 MakeTableHeader ("Margin region 2") <<
393 MakeTableHeader ("Affix 2") <<
394 EndTableRow << endl;
396 for (int i = 0; i < m_AffixAlignments.size(); i++)
398 pAlign = m_AffixAlignments.at(i);
399 LogStream <<
400 StartTableRow <<
401 TableData( pAlign->GetShift1() ) <<
402 TableData( pAlign->GetMargin1()) <<
403 TableData( pAlign->GetAffix1() ) <<
404 TableData( pAlign->GetShift2() ) <<
405 TableData( pAlign->GetMargin2()) <<
406 TableData( pAlign->GetAffix2() ) <<
407 EndTableRow << endl;
412 LogStream << EndTable;
416 void CSignatureAlignment::Display(CMiniLexicon* mini)
418 AffixAlignment* pAlign;
420 if (m_AffixAlignments.count() > 0)
422 mini->LogFileStartTable();
423 mini->LogFileStartRow();
424 mini->LogFile (m_SigPtr1->Display('-'));
425 mini->LogFile (m_SigPtr2->Display('-'));
426 mini->LogFileEndRow();
427 mini->LogFileStartRow();
428 mini->LogFileHeader ("Shift region 1");
429 mini->LogFileHeader ("Margin region 1");
430 mini->LogFileHeader ("Affix 1");
431 mini->LogFileHeader ("Shift region 2");
432 mini->LogFileHeader ("Margin region 2");
433 mini->LogFileHeader ("Affix 2");
434 mini->LogFileEndRow();
437 for (int i = 0; i < m_AffixAlignments.size(); i++)
439 pAlign = m_AffixAlignments.at(i);
440 mini->LogFileStartRow();
441 mini->LogFileSimpleString( pAlign->GetShift1() );
442 mini->LogFileSimpleString( pAlign->GetMargin1() );
443 mini->LogFileSimpleString( pAlign->GetAffix1() );
444 mini->LogFileSimpleString( pAlign->GetShift2() );
445 mini->LogFileSimpleString( pAlign->GetMargin2() );
446 mini->LogFileSimpleString( pAlign->GetAffix2() );
447 mini->LogFileEndRow();