HowManyAreAnalyzed(): use status_user_agent to report progress
[linguistica.git] / Alignment.cpp
blobc05e6bb6d5794e39adc79e2ded664a359f3f24c9
1 // Implementation of CAlignment methods
2 // Copyright © 2009 The University of Chicago
3 #include "Alignment.h"
4 #include "StringEditGrid.h"
5 #include "StringSurrogate.h"
6 #include "Parse.h"
8 // construction/destruction.
10 CAlignment::CAlignment(const QString String1, const QString String2)
11 : m_Str1(new CParse(CStringSurrogate(QString('#') + String1))),
12 m_Str2(new CParse(CStringSurrogate(QString('#') + String2))),
13 m_Length1(String1.size() + 1),
14 m_Length2(String2.size() + 1),
15 m_Match1(new int[m_Length1]), // filled below
16 m_Match2(new int[m_Length2]), // filled below
17 m_Score(0.0),
18 m_Slips(0),
19 m_Spans(1),
20 m_Identities(0)
22 std::fill(&m_Match1[0], &m_Match1[m_Length1], -5);
23 std::fill(&m_Match2[0], &m_Match2[m_Length2], -5);
26 CAlignment::CAlignment(CParse* Parse1, CParse* Parse2)
27 : m_Str1(), // initialized below
28 m_Str2(), // initialized below
29 m_Length1(Parse1->GetKeyLength() + 1),
30 m_Length2(Parse2->GetKeyLength() + 1),
31 m_Match1(new int[m_Length1]), // filled below
32 m_Match2(new int[m_Length2]), // filled below
33 m_Score(0.0),
34 m_Slips(0),
35 m_Spans(1),
36 m_Identities(0)
39 m_Str1 = new CParse(QChar('#'));
40 m_Str1->Append(*Parse1);
41 m_Str1->SimplifyParseStructure();
44 m_Str2 = new CParse(QChar('#'));
45 m_Str2->Append(*Parse2);
46 m_Str2->SimplifyParseStructure();
48 std::fill(&m_Match1[0], &m_Match1[m_Length1], -5);
49 std::fill(&m_Match2[0], &m_Match2[m_Length2], -5);
52 CAlignment::CAlignment(const CAlignment* other)
53 : m_Str1(new CParse(*other->m_Str1)),
54 m_Str2(new CParse(*other->m_Str2)),
55 m_Length1(other->m_Length1),
56 m_Length2(other->m_Length2),
57 m_Match1(new int[m_Length1]), // filled below
58 m_Match2(new int[m_Length2]), // filled below
59 m_Score(other->m_Score),
60 m_Slips(other->m_Slips),
61 m_Spans(other->m_Spans),
62 m_Identities(other->m_Identities)
64 std::copy(&other->m_Match1[0], &other->m_Match1[m_Length1],
65 &m_Match1[0]);
66 std::copy(&other->m_Match2[0], &other->m_Match2[m_Length2],
67 &m_Match2[0]);
70 CAlignment::CAlignment(const CAlignment& other)
71 : m_Str1(new CParse(*other.m_Str1)),
72 m_Str2(new CParse(*other.m_Str2)),
73 m_Length1(other.m_Length1),
74 m_Length2(other.m_Length2),
75 m_Match1(new int[m_Length1]), // filled below
76 m_Match2(new int[m_Length2]), // filled below
77 m_Score(other.m_Score),
78 m_Slips(other.m_Slips),
79 m_Spans(other.m_Spans),
80 m_Identities(other.m_Identities)
82 std::copy(&other.m_Match1[0], &other.m_Match1[m_Length1],
83 &m_Match1[0]);
84 std::copy(&other.m_Match2[0], &other.m_Match2[m_Length2],
85 &m_Match2[0]);
88 CAlignment::~CAlignment()
90 delete[] m_Match2;
91 delete[] m_Match1;
92 delete m_Str2;
93 delete m_Str1;
96 CAlignment& CAlignment::operator=(const CAlignment& other)
98 *m_Str1 = *other.m_Str1;
99 *m_Str2 = *other.m_Str2;
100 m_Length1 = other.m_Length1;
101 m_Length2 = other.m_Length2;
102 std::copy(&other.m_Match1[0], &other.m_Match1[m_Length1],
103 &m_Match1[0]);
104 std::copy(&other.m_Match2[0], &other.m_Match2[m_Length2],
105 &m_Match2[0]);
106 m_Score = other.m_Score;
107 m_Slips = other.m_Slips;
108 m_Spans = other.m_Spans;
109 m_Identities = other.m_Identities;
110 return *this;
113 bool CAlignment::String1CharMatches(int n)// true if the char is aligned and identical to matchee
116 if ( m_Match1[n] == -1 ) return FALSE;
117 if ( m_Str1->GetChar(n) == m_Str2->GetChar( m_Match1[ n ] ) )
119 return TRUE;
122 return FALSE;
126 bool CAlignment::String2CharMatches(int n)// true if the char is aligned and identical to matchee
129 if ( m_Match2[n] == -1 ) return FALSE;
130 if ( m_Str2->GetChar(n) == m_Str1->GetChar( m_Match2[ n ] ) )
132 return TRUE;
135 return FALSE;
138 bool CAlignment::PerfectMatch (int n, int m )// true of these two chars are identical and aligned
141 if ( m_Match1[n] == m &&
142 m_Match2[m] == n &&
143 m_Str1->GetChar(n) == m_Str2->GetChar(m)
146 return TRUE;
149 else
150 return FALSE;
154 CParse CAlignment::CalculateDisplay()
157 CParse Return;
159 UINT index = 0; // on the page
160 int locTop = 1, locBottom = 1; // because 0's are for the word boundary symbols #
161 int State= 0;
162 char Top [50], Bottom[50], Middle[50];
163 CParse Return;
164 CParse TopPiece, BottomPiece;
166 0 Both match
167 1 Just top
168 2 Just bottom
169 3 both no match
174 while ( locTop < m_Length1 || locBottom < m_Length2 )
176 switch (State)
178 case 0: // Both match
180 while ( PerfectMatch (locTop, locBottom) &&
181 locTop < m_Length1
184 Top [ index ] = m_Str1->GetChar( locTop++ );
185 Bottom [ index ] = m_Str2->GetChar( locBottom++ );
186 Middle [ index ] = '|';
187 index++;
189 Top [ index ] = ' ';
190 Bottom [ index ] = ' ';
191 Middle [ index ] = ' ';
192 index++;
195 if ( locTop == m_Length1
196 && locBottom == m_Length2 )
197 break;
198 //----------------------------------------------//
201 if ( locTop == m_Length1 )
203 State = 2;
204 break;
206 if ( locBottom == m_Length2 )
208 State = 1;
209 break;
212 if (m_Match1 [ locTop ] == locBottom
214 m_Match2 [ locBottom ] == locTop
217 State = 3;
219 else if ( m_Match1 [ locTop ] == -1 )
221 State = 1;
223 else
225 State = 2;
227 break;
231 case 1:
233 while ( m_Match1 [ locTop ] == -1 )
235 TopPiece.Append( m_Str1->GetChar( locTop ) );
237 Top [ index ] = m_Str1->GetChar( locTop++ );
238 Bottom [ index ] = ' ';
239 Middle [ index ] = ' ';
240 index++;
242 Top [ index] = ' ';
243 Bottom [ index ] = ' ';
244 Middle [ index] = ' ';
245 index++;
248 TopPiece.Append (' ');
249 //-----------------------------------------------//
250 if ( PerfectMatch (locTop, locBottom) )
252 State = 0;
254 else if (m_Match1 [ locTop ] == locBottom
256 m_Match2 [ locBottom ] == locTop
259 State = 3;
261 else
263 State = 2;
265 break;
267 case 2:
269 while ( m_Match2 [ locBottom ] == -1 )
271 Top [ index ] = ' ';
272 Bottom [ index ] = m_Str2->GetChar( locBottom++ );
273 Middle [ index ] = ' ';
274 index++;
276 Top [ index] = ' ';
277 Bottom [ index ] = ' ';
278 Middle [ index] = ' ';
279 index++;
281 //-----------------------------------------------//
282 if ( PerfectMatch (locTop, locBottom) )
284 State = 0;
287 else if (m_Match1 [ locTop ] == locBottom
289 m_Match2 [ locBottom ] == locTop
292 State = 3;
294 break;
297 case 3: // Linked but not a match
299 while ( m_Match1 [ locTop ] == locBottom
301 m_Match2 [ locBottom ] == locTop
303 m_Str1->GetChar(locTop ) != m_Str2->GetChar( locBottom )
306 Top [ index] = m_Str1->GetChar( locTop++ );
307 Bottom [ index ] = m_Str2->GetChar( locBottom++ );
308 Middle [ index] = '#';
309 index++;
312 Top [ index] = ' ';
313 Bottom [ index ] = ' ';
314 Middle [ index] = ' ';
315 index++;
318 //----------------------------------------------//
319 if (m_Match1 [ locTop ] == locBottom
321 m_Match2 [ locBottom ] == locTop
324 State = 0;
326 else if ( m_Match1 [ locTop ] == -1 )
328 State = 1;
330 else
332 State = 2;
334 break;
335 } // end of case
351 Top [ index ] = '\0';
352 Middle [ index ] = '\0';
353 Bottom [index ] = '\0';
355 Return.Append('\n');
356 Return.Append(Top);
357 Return.Append('\n');
358 Return.Append(Middle);
359 Return.Append('\n');
360 Return.Append(Bottom);
361 Return.Append('\n');
362 Return.Append ("Slips: " );
363 Return.Append( m_Slips);
364 Return.Append ("Spans: " );
365 Return.Append( m_Spans);
366 return Return;
369 return Return;
373 CParse CAlignment::FindSubstitution()
374 // this assumes that there is only one part of the alignment
375 // where the two words disagree; and it returns the two
376 // pieces that disagree with each other.
378 CParse Return;
379 int Str1Match = 0, Str2Match = 0;
381 if ( m_Slips != 1 ) { return Return; }
383 int piece;
384 for (piece = 1; piece <= m_Str1->Size(); piece++)
386 Str2Match = Str2MatchForStr1Piece (piece);
387 if ( Str2Match <= 0 || m_Str1->GetPiece(piece) != m_Str2->GetPiece(Str2Match) )
389 Return.Append( m_Str1->GetPiece( piece ) ) ;
393 if ( Return.Size() == 0)
395 Return = CParse(CStringSurrogate(QString("NULL").unicode(), 0, 4));
397 //---------------------------------------------------------------//
398 for (piece = 1; piece <= m_Str2->Size(); piece++)
400 Str1Match = Str1MatchForStr2Piece (piece);
401 if ( Str1Match <= 0 || m_Str2->GetPiece(piece) != m_Str1->GetPiece(Str1Match) )
403 Return.Append( m_Str2->GetPiece( piece ) ) ;
407 if ( Return.Size() == 1)
409 Return.Append( CStringSurrogate(QString("NULL").unicode(), 0, 4));
412 return Return;
416 CParse CAlignment::SpellOut()
419 CParse Return ( *m_Str1 );
420 Return.Append ( *m_Str2);
422 return Return;
427 CParse CAlignment::FindContext()
429 /* This returns a parse consisting of 2 or 3 pieces; each piece is either something shared by
430 both strings of the alignment, or else it's an underscore, representing the difference(s)
431 between the string. */
434 CParse Return;
436 int loc1=1, loc2=1;
437 int StartLoc1 = 0, StartLoc2 = 0;
438 int State; // 1=matching, 2= not matching
440 if (m_Spans > 3 ) { return Return; }
443 if ( PerfectMatch (1,1) )
445 State = 1; // matching
447 else
449 State = 2; // not matching
452 while ( loc1 < m_Length1 || loc2 < m_Length2 )
454 if (State == 1)
456 while ( loc1 < m_Length1 && loc2 < m_Length2 && PerfectMatch (loc1, loc2) )
458 loc1++;
459 loc2++;
462 CStringSurrogate Piece1 ( m_Str1->GetKeyPointer(), StartLoc1, loc1 - StartLoc1 );
463 CStringSurrogate Piece2 ( m_Str2->GetKeyPointer(), StartLoc2, loc2 - StartLoc2 );
465 if ( Piece1.GetLength () == 0 ) { Return.Append (CStringSurrogate (QString("NULL").unicode(), 0, 4 ) ); }
466 else { Return.Append ( Piece1 ); }
467 // yuhuask Could be ?
469 StartLoc1 = loc1 ;
470 StartLoc2 = loc2 ;
471 State = 2;
473 else
475 while ( loc1 < m_Length1 && ! String1CharMatches (loc1) )
478 loc1++;
480 while ( loc2 < m_Length2 && !String2CharMatches (loc2) )
482 loc2++;
485 // yuhuask Here, loc2 == match1[Loc1] or Loc2=m_Length2&&Loc1=m_Lenght1
487 Return.Append(QChar('_'));
489 StartLoc1 = loc1 ;
490 StartLoc2 = loc2 ;
491 State = 1;
497 return Return;
503 double CAlignment::FindStringEditDistance()
505 CStringEditGrid Grid (this);
506 return Grid.FindBestAlignment ( *this );
511 /// piece number of the piece that corresponds to piece n, or 0 if none
512 int CAlignment::Str2MatchForStr1Piece(int n)
514 if (n < 1 || n > m_Str1->Size())
515 return 0;
517 int Str1Spot = m_Str1->GetPositionOfFirstCharOfThisPiece( n );
518 int Str2Spot = m_Match1 [ Str1Spot ];
520 if ( Str2Spot < 0 ) { return 0; }
522 int Str2Piece = -1;
523 for (int i = 1; i <= m_Str2->Size(); i++)
525 if ( m_Str2->GetPositionOfFirstCharOfThisPiece (i) == Str2Spot )
527 Str2Piece = i;
528 break;
531 if ( Str2Piece < 0 ) { return -1;}
535 return Str2Piece;
539 /// piece number of the piece that corresponds to piece n, or 0 if none
540 int CAlignment::Str1MatchForStr2Piece(int n)
542 if (n < 1 || n > m_Str2->Size())
543 return 0;
545 int Str2Spot = m_Str2->GetPositionOfFirstCharOfThisPiece( n );
546 int Str1Spot = m_Match2 [ Str2Spot ];
548 if ( Str1Spot < 0 ) { return 0; }
550 int Str1Piece = -1;
551 for (int i = 1; i <= m_Str1->Size(); i++)
553 if ( m_Str1->GetPositionOfFirstCharOfThisPiece (i) == Str1Spot )
555 Str1Piece = i;
556 break;
559 if ( Str1Piece < 0 ) { return -1;}
562 return Str1Piece;