HowManyAreAnalyzed(): use status_user_agent to report progress
[linguistica.git] / Allomorphy.cpp
blob62e6759fbc8ca1b7a2ffc17f55062129b9e60ac0
1 // Implementation of AffixAlignment, SignatureAlignment methods
2 // Copyright © 2009 The University of Chicago
3 #include "Allomorphy.h"
5 #include <QTextStream>
6 #include "Signature.h"
7 #include "StringSurrogate.h"
8 #include "Parse.h"
9 #include "StringFunc.h"
10 #include "HTML.h"
11 #include "Typedefs.h"
13 AffixAlignment::AffixAlignment(QString Affix1, QString Affix2)
14 : m_OriginalAffix1(), m_OriginalAffix2(),
15 m_Affix1(Affix1), m_Affix2(Affix2),
16 m_Margin1(), m_Margin2(),
17 m_Shift1(), m_Shift2(),
18 m_Status(Affix1 == Affix2 ? IDENTICAL : DIFFERENT),
19 m_Agreement_unigram(0.0),
20 m_Agreement_bigram(0.0),
21 m_Disagreement_unigram(0.0),
22 m_Disagreement_bigram(0.0) { }
24 AffixAlignment::AffixAlignment(QString Margin1, QString Affix1,
25 QString Margin2, QString Affix2)
26 : m_OriginalAffix1(), m_OriginalAffix2(),
27 m_Affix1(Affix1), m_Affix2(Affix2),
28 m_Margin1(Margin1), m_Margin2(Margin2),
29 m_Shift1(), m_Shift2(),
30 m_Status(Affix1 == Affix2 ? IDENTICAL : DIFFERENT),
31 m_Agreement_unigram(0.0),
32 m_Agreement_bigram(0.0),
33 m_Disagreement_unigram(0.0),
34 m_Disagreement_bigram(0.0) { }
36 SignatureAlignment::SignatureAlignment(CSignature* Sig1, CSignature* Sig2)
37 : m_SigPtr1(Sig1), m_SigPtr2(Sig2),
38 m_AffixAlignments(),
39 m_Sig1(Sig1), m_Sig2(Sig2),
40 m_Sig1AlignedAffixes(), m_Sig2AlignedAffixes() { }
42 void SignatureAlignment::FindBestAlignment()
44 QString Morph, OtherMorph, MarginPiece;
45 AffixAlignment* pAlign;
46 CParse Margins1, Margins2, Suffixes1, Suffixes2;
47 int i, j, morphlength, morphlength2;
48 CSS ssMorph, ssOtherMorph;
50 //-------------------------------- Step 1 --------------------------------------------------------//
51 // first, look for identical affixes in the sigs;
52 for (i = 1; i <= m_Sig1.Size(); i++)
54 Morph = m_Sig1.GetPiece(i).Display();
55 for ( j = 1; j <= m_Sig2.Size(); j++)
57 OtherMorph = m_Sig2.GetPiece(j).Display();
58 if (OtherMorph == Morph)
60 pAlign = new AffixAlignment (Morph, Morph);
61 m_AffixAlignments.append( pAlign );
62 if (!m_Sig1AlignedAffixes.Contains( Morph )) m_Sig1AlignedAffixes. Append ( Morph );
63 if (!m_Sig2AlignedAffixes.Contains( Morph )) m_Sig2AlignedAffixes. Append ( Morph );
64 break;
70 //-------------------------------- Step 2 --------------------------------------------------------//
71 // now look for non-identical but end-matching affix pairs, like "es/s"
73 for ( i = 1; i <= m_Sig1.Size(); i++)
75 ssMorph = m_Sig1.GetPiece(i);
76 morphlength = ssMorph.GetLength();
78 for ( j = 1; j <= m_Sig2.Size(); j++)
80 ssOtherMorph = m_Sig2.GetPiece(j);
81 morphlength2 = ssOtherMorph.GetLength();
82 if ( morphlength2 == morphlength ) { continue; }
84 if ( ssOtherMorph.Right( morphlength ) == ssMorph )
86 MarginPiece = ssOtherMorph.Left(morphlength2 - morphlength).Display();
87 pAlign = new AffixAlignment (TheStringNULL, ssMorph.Display(),
88 MarginPiece, ssMorph.Display() );
89 m_AffixAlignments.append(pAlign);
90 Margins1. Append (TheStringNULL);
91 Margins2. Append (MarginPiece);
92 Suffixes1.Append (ssMorph);
93 Suffixes2.Append (ssMorph);
94 if (!m_Sig1AlignedAffixes.Contains( ssMorph )) m_Sig1AlignedAffixes. Append ( ssMorph );
95 if (!m_Sig2AlignedAffixes.Contains( ssOtherMorph )) m_Sig2AlignedAffixes. Append ( ssOtherMorph );
97 if ( ssMorph.Right(morphlength2) == ssOtherMorph )
99 MarginPiece = ssMorph.Left( morphlength - morphlength2 ).Display();
100 pAlign = new AffixAlignment ( MarginPiece, ssOtherMorph.Display(),
101 TheStringNULL, ssOtherMorph.Display() );
102 m_AffixAlignments.append(pAlign);
103 Margins1. Append (MarginPiece);
104 if (!Margins2.ContainsNULL()) Margins2. Append (TheStringNULL);
105 Suffixes1.Append (ssOtherMorph);
106 Suffixes2.Append (ssOtherMorph);
107 if (!m_Sig1AlignedAffixes.Contains( ssMorph )) m_Sig1AlignedAffixes. Append ( ssMorph );
108 if (!m_Sig2AlignedAffixes.Contains( ssOtherMorph )) m_Sig2AlignedAffixes. Append ( ssOtherMorph );
113 //-------------------------------- Step 3 --------------------------------------------------------//
114 // if one of the signatures has an X in its margin region, and it also has an X as an affix and the other
115 // signature has a NULL, then we can align the X and the NULL:
117 if (m_Sig1.ContainsNULL())
119 for ( i = 1; i <= m_Sig2.Size(); i++)
121 Morph = m_Sig2.GetPiece (i).Display() ;
122 if (Morph == TheStringNULL) continue;
123 if ( Margins2.Contains( Morph ) )
125 pAlign = new AffixAlignment ( TheStringNULL, TheStringNULL,
126 Morph, TheStringNULL);
127 m_AffixAlignments.append(pAlign);
129 if (!Margins1. ContainsNULL()) Margins1. Append (TheStringNULL);
130 if (!Suffixes1.ContainsNULL()) Suffixes1.Append (TheStringNULL);
131 if (!Suffixes2.ContainsNULL()) Suffixes2.Append (TheStringNULL);
132 if (!m_Sig1AlignedAffixes.ContainsNULL() ) m_Sig1AlignedAffixes. Append ( TheStringNULL );
133 if (!m_Sig2AlignedAffixes.Contains( Morph ) ) m_Sig2AlignedAffixes. Append ( Morph );
139 if (m_Sig2.ContainsNULL())
141 for ( i = 1; i <= m_Sig1.Size(); i++)
143 if ( m_Sig1.GetPiece(i).IsNULL() ) continue;
144 Morph = m_Sig1.GetPiece (i).Display() ;
145 if ( Margins1.Contains( Morph ) )
147 pAlign = new AffixAlignment ( Morph, TheStringNULL,
148 TheStringNULL, TheStringNULL);
149 m_AffixAlignments.append(pAlign);
151 Margins2.Append(TheStringNULL);
152 Suffixes1.Append (TheStringNULL);
153 Suffixes2.Append(TheStringNULL);
154 if (!m_Sig2AlignedAffixes.ContainsNULL() ) m_Sig2AlignedAffixes. Append ( TheStringNULL );
155 if (!m_Sig1AlignedAffixes.Contains( Morph ) ) m_Sig1AlignedAffixes. Append ( Morph );
163 void SignatureAlignment::Display(QTextStream& LogStream)
165 AffixAlignment* pAlign;
167 if (m_AffixAlignments.count() > 0)
169 LogStream <<
170 m_Sig1.Display('-') << "<br>" << m_Sig2.Display('-') <<
171 StartTable <<
172 StartTableRow <<
173 MakeTableHeader ("Shift region 1") <<
174 MakeTableHeader ("Margin region 1") <<
175 MakeTableHeader ("Affix 1") <<
176 MakeTableHeader ("Shift region 2") <<
177 MakeTableHeader ("Margin region 2") <<
178 MakeTableHeader ("Affix 2") <<
179 EndTableRow << endl;
181 for (int i = 0; i < m_AffixAlignments.size(); i++)
183 pAlign = m_AffixAlignments.at(i);
184 LogStream <<
185 StartTableRow <<
186 TableData( pAlign->GetShift1() ) <<
187 TableData( pAlign->GetMargin1()) <<
188 TableData( pAlign->GetAffix1() ) <<
189 TableData( pAlign->GetShift2() ) <<
190 TableData( pAlign->GetMargin2()) <<
191 TableData( pAlign->GetAffix2() ) <<
192 EndTableRow << endl;
197 LogStream << EndTable;