HowManyAreAnalyzed(): use status_user_agent to report progress
[linguistica.git] / StringEditGrid.cpp
blob4b987a364411f2c31be2eb69b33bfd217815928b
1 // implementation of CStringEditGrid methods
2 // Copyright © 2009 The University of Chicago
3 #include "StringEditGrid.h"
4 #include <algorithm>
5 #include "Alignment.h"
6 #include "Parse.h"
7 #include "StringFunc.h"
9 // construction/destruction.
11 namespace {
12 void copy_cstr(const QChar* src, const std::size_t len, QChar* dest)
14 std::copy(&src[0], &src[len], &dest[0]);
15 dest[len] = QChar();
17 QChar* new_cstr(const QChar* src, const std::size_t len)
19 QChar* dest = new QChar[len + 1];
20 copy_cstr(src, len, dest);
21 return dest;
24 CStringEditGrid::CStringEditGrid(CAlignment* pAlign)
25 : m_Length1(pAlign->m_Length1),
26 m_String1(new_cstr(pAlign->m_Str1->GetKeyPointer(), m_Length1)),
27 m_Length2(pAlign->m_Length2),
28 m_String2(new_cstr(pAlign->m_Str2->GetKeyPointer(), m_Length2)),
29 m_DeletionCost(1.0),
30 m_SubstitutionCost(1.5),
31 m_Scores(new float[m_Length1 * m_Length2]), // left uninitialized
32 // left uninitialized
33 m_GridEntries(new StringEditGridEntry*[m_Length1 * m_Length2]) { }
35 CStringEditGrid::CStringEditGrid(const QString String1, const QString String2)
36 : m_Length1(String1.size()),
37 m_String1(new_cstr(String1.data(), m_Length1)),
38 m_Length2(String2.size()),
39 m_String2(new_cstr(String2.data(), m_Length2)),
40 m_DeletionCost(1.0),
41 m_SubstitutionCost(1.5),
42 m_Scores(new float[m_Length1 * m_Length2]), // left uninitialized
43 // left uninitialized
44 m_GridEntries(new StringEditGridEntry*[m_Length1 * m_Length2])
45 { Q_ASSERT(!String1.isEmpty() && !String2.isEmpty()); }
47 CStringEditGrid::~CStringEditGrid()
49 for (int i = 0; i < m_Length1 * m_Length2; ++i)
50 delete m_GridEntries[i];
51 delete[] m_GridEntries;
52 delete[] m_Scores;
54 delete[] m_String2;
55 delete[] m_String1;
58 float CStringEditGrid::GetScore(int row, int col)
60 if (row >= m_Length2 || col >= m_Length1 ) { return -1; }
62 return ( m_Scores [ row * m_Length1 + col ] );
66 void CStringEditGrid::SetScore(int row, int col, float value)
68 if (row >= m_Length2 || col >= m_Length1 ) { return ; }
70 m_Scores [ row * m_Length1 + col ] = value;
76 CAlignment& CStringEditGrid::GetAlignment(CAlignment& Alignment)
78 Alignment.m_Identities = 0;
80 if ( Alignment.m_Match1 ) { delete Alignment.m_Match1; }
81 if ( Alignment.m_Match2 ) { delete Alignment.m_Match2; }
83 Alignment.m_Match1 = new int [ m_Length1 ];
84 Alignment.m_Match2 = new int [ m_Length2 ];
85 Alignment.m_Slips = 0;
86 Alignment.m_Spans = 1;
88 StringEditGridEntry* pEntry;
89 int row = m_Length2 - 1;
90 int col = m_Length1 - 1;
91 eAlignmentOperation PreviousMatch = End;
93 // we're going to move backwards, down the grid from the upper right-hand corner
94 // to the lower left-hand corner.
95 // You have to understand string-edit grids to follow what's going on here!
98 pEntry = GetStringEditGridEntry (row, col);
100 while ( pEntry->m_Movement != Start )
102 switch (pEntry->m_Movement)
104 case FromBelow:
106 // we added an element from String2 not linked to String1
107 if ( PreviousMatch == IdenticalMatch ||
108 PreviousMatch == End
111 Alignment.m_Slips++;
113 if ( PreviousMatch == IdenticalMatch )
115 Alignment.m_Spans++;
116 Alignment.m_Str1->CutRightBeforeHere(col+1);
117 Alignment.m_Str2->CutRightBeforeHere(row+1);
121 Alignment.m_Match2[row] = -1;
122 row--;
123 PreviousMatch = FromBelow;
124 pEntry = GetStringEditGridEntry(row, col);
125 break;
127 case FromLeft:
129 // we added an element from String1 not linked to String2
130 if ( PreviousMatch == IdenticalMatch ||
131 PreviousMatch == End
134 Alignment.m_Slips++;
136 if ( PreviousMatch == IdenticalMatch )
138 Alignment.m_Spans++;
139 Alignment.m_Str1->CutRightBeforeHere (col+1);
140 Alignment.m_Str2->CutRightBeforeHere (row+1);
143 Alignment.m_Match1[col] = -1;
144 col--;
145 PreviousMatch = FromLeft;
146 pEntry = GetStringEditGridEntry (row, col);
147 break;
149 case FromBelowAndLeft:
150 { // we made a matching alignment
152 Alignment.m_Match1[col] = row;
153 Alignment.m_Match2[row] = col;
155 if (m_String1[col] == m_String2[row] )
157 if ( PreviousMatch != IdenticalMatch &&
158 PreviousMatch != End )
160 Alignment.m_Spans++;
161 Alignment.m_Str1->CutRightBeforeHere(col+1);
162 Alignment.m_Str2->CutRightBeforeHere(row+1);
164 PreviousMatch = IdenticalMatch;
165 Alignment.m_Identities++;
167 else // non-identity match
169 if ( PreviousMatch == IdenticalMatch || // april 2003
170 PreviousMatch == End
173 Alignment.m_Slips++;
175 if ( PreviousMatch == IdenticalMatch ) // april 2003
177 Alignment.m_Spans++;
178 Alignment.m_Str1->CutRightBeforeHere(col+1);
179 Alignment.m_Str2->CutRightBeforeHere(row+1);
181 PreviousMatch = NonIdenticalMatch;
183 col--;
184 row--;
185 pEntry = GetStringEditGridEntry (row, col);
186 break;
187 // yuhuask ? Not very clear about the diff between slips and spans
190 default: break;
194 Q_ASSERT (row == 0);
195 Q_ASSERT (col == 0);
196 Q_ASSERT (Alignment.m_Str1->GetChar(0) == '#');
197 Alignment.m_Match1[0] = 0;
198 Alignment.m_Match2[0] = 0;
200 return Alignment;
203 StringEditGridEntry* CStringEditGrid::GetStringEditGridEntry(int row, int col)
205 if (row >= m_Length2 || col >= m_Length1 )
207 return NULL;
209 return m_GridEntries [ row * m_Length1 + col ] ;
214 void CStringEditGrid::SetStringEditGridEntry (int row, int col, StringEditGridEntry* pAlign)
216 if (row >= m_Length2 || col >= m_Length1 )
218 return ;
220 m_GridEntries [ row * m_Length1 + col ] = pAlign;
225 float CStringEditGrid::FindBestAlignment(CAlignment& Alignment)
230 float ScoreFromBelow,
231 ScoreDiagonal,
232 ScoreFromLeft;
233 int Winner = 0;
234 float BestScore = 0.0;
235 StringEditGridEntry* pEntry;
236 float Cost ;
238 for (int col = 0; col < m_Length1; col++)
240 for (int row = 0; row < m_Length2; row++) // row number
243 if (col == 0 && row == 0)
245 pEntry = new StringEditGridEntry;
246 pEntry->m_Movement = Start;
247 pEntry->m_Slips = 0;
248 SetStringEditGridEntry (0,0, pEntry);
249 SetScore(0,0, 0 );
250 continue;
253 if ( col == 0) { ScoreFromLeft = -1; }
254 else
256 ScoreFromLeft = GetScore( row, col-1) + m_DeletionCost;
259 if (row == 0) { ScoreFromBelow = -1; }
260 else
262 ScoreFromBelow = GetScore( row - 1, col ) + m_DeletionCost;
265 if (row == 0 || col == 0) { ScoreDiagonal = -1; }
266 else
268 Cost = GetCost( m_String1[col], m_String2[row] );
269 ScoreDiagonal = GetScore (row-1, col-1 ) + Cost;
271 //-------------------------------------------------
272 // Now find the lowest score that isn't negative
273 //--------------------------------------------------
274 if (ScoreFromLeft >= 0)
276 Winner = 0;
277 BestScore = ScoreFromLeft;
279 else if (ScoreFromBelow >= 0 )
281 Winner = 1;
282 BestScore = ScoreFromBelow;
284 else if (ScoreDiagonal >= 0 )
286 Winner = 2;
287 BestScore = ScoreDiagonal;
289 else { Q_ASSERT (FALSE); }
291 if (Winner == 0)
293 if ( ScoreFromBelow >= 0 && ScoreFromBelow < ScoreFromLeft )
295 Winner = 1;
296 BestScore = ScoreFromBelow;
299 if (Winner == 0 || Winner == 1)
301 if ( ScoreDiagonal >= 0 && ScoreDiagonal < BestScore )
303 Winner = 2;
304 BestScore = ScoreDiagonal;
307 //--------------------------------------------------
308 // Use the conclusions...
309 //--------------------------------------------------
311 if (Winner == 0) // arrow from left
314 pEntry = new StringEditGridEntry;
315 SetStringEditGridEntry (row, col, pEntry);
316 pEntry->m_Movement = FromLeft;
317 // qEntry = GetStringEditGridEntry (row, col-1);
320 if (Winner == 1) // arrow from below
323 pEntry = new StringEditGridEntry;
324 SetStringEditGridEntry (row, col, pEntry);
325 pEntry->m_Movement = FromBelow;
326 // qEntry = GetStringEditGridEntry (row-1, col);
330 if (Winner == 2) // up the diagonal
333 pEntry = new StringEditGridEntry;
334 SetStringEditGridEntry (row, col, pEntry);
335 pEntry->m_Movement = FromBelowAndLeft;
336 // qEntry = GetStringEditGridEntry (row-1, col-1);
339 SetScore (row, col, BestScore);
345 GetAlignment (Alignment );
346 Alignment.m_Score = BestScore;
348 return BestScore;
349 // return GetScore (m_Length2 - 1, m_Length1 - 1); //why was this here? JG
356 float CStringEditGrid::GetCost(QChar c1, QChar c2)
359 if ( c1 == c2 ) { return 0; }
360 else { return m_SubstitutionCost; }