Fixed issue #2175: TortoiseGitBlame fails to search if line has non-ascii chars and...
[TortoiseGit.git] / src / TortoiseGitBlame / TortoiseGitBlameData.cpp
blobd8a2c3218083bfe03505e3666013a2b79aa93501
1 // TortoiseGitBlame - a Viewer for Git Blames
3 // Copyright (C) 2008-2013 - TortoiseGit
4 // Copyright (C) 2010-2013 Sven Strickroth <email@cs-ware.de>
5 // Copyright (C) 2003 Don HO <donho@altern.org>
7 // This program is free software; you can redistribute it and/or
8 // modify it under the terms of the GNU General Public License
9 // as published by the Free Software Foundation; either version 2
10 // of the License, or (at your option) any later version.
12 // This program is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
17 // You should have received a copy of the GNU General Public License
18 // along with this program; if not, write to the Free Software Foundation,
19 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 // CTortoiseGitBlameData.cpp : implementation of the CTortoiseGitBlameData class
24 #include "stdafx.h"
25 #include "TortoiseGitBlame.h"
26 #include "CommonAppUtils.h"
27 #include "TortoiseGitBlameDoc.h"
28 #include "TortoiseGitBlameData.h"
29 #include "MainFrm.h"
30 #include "EditGotoDlg.h"
31 #include "LoglistUtils.h"
32 #include "FileTextLines.h"
33 #include "UnicodeUtils.h"
34 #include "MenuEncode.h"
35 #include "gitdll.h"
36 #include "SysInfo.h"
37 #include "StringUtils.h"
38 #include "TGitPath.h"
40 wchar_t WideCharSwap2(wchar_t nValue)
42 return (((nValue>> 8)) | (nValue << 8));
45 // CTortoiseGitBlameData construction/destruction
47 CTortoiseGitBlameData::CTortoiseGitBlameData()
49 m_encode = -1;
52 CTortoiseGitBlameData::~CTortoiseGitBlameData()
56 int CTortoiseGitBlameData::GetEncode(unsigned char *buff, int size, int *bomoffset)
58 CFileTextLines textlines;
59 CFileTextLines::UnicodeType type = textlines.CheckUnicodeType(buff, size);
61 if (type == CFileTextLines::UTF8BOM)
63 *bomoffset = 3;
64 return CP_UTF8;
66 if (type == CFileTextLines::UTF8)
67 return CP_UTF8;
69 if (type == CFileTextLines::UTF16_LE)
70 return 1200;
71 if (type == CFileTextLines::UTF16_LEBOM)
73 *bomoffset = 2;
74 return 1200;
77 if (type == CFileTextLines::UTF16_BE)
78 return 1201;
79 if (type == CFileTextLines::UTF16_BEBOM)
81 *bomoffset = 2;
82 return 1201;
85 return GetACP();
88 int CTortoiseGitBlameData::GetEncode(int *bomoffset)
90 int encoding = 0;
91 BYTE_VECTOR rawAll;
92 for (auto it = m_RawLines.begin(), it_end = m_RawLines.end(); it != it_end; ++it)
94 rawAll.append(&(*it)[0], it->size());
96 encoding = GetEncode(&rawAll[0], (int)rawAll.size(), bomoffset);
97 return encoding;
100 void CTortoiseGitBlameData::ParseBlameOutput(BYTE_VECTOR &data, CGitHashMap & HashToRev, DWORD dateFormat, bool bRelativeTimes)
102 std::map<CGitHash, CString> hashToFilename;
104 std::vector<CGitHash> hashes;
105 std::vector<int> originalLineNumbers;
106 std::vector<CString> filenames;
107 std::vector<BYTE_VECTOR> rawLines;
108 std::vector<CString> authors;
109 std::vector<CString> dates;
111 CGitHash hash;
112 int originalLineNumber = 0;
113 int finalLineNumber = 0;
114 int numberOfSubsequentLines = 0;
115 CString filename;
117 int pos = 0;
118 bool expectHash = true;
119 while (pos >= 0 && (size_t)pos < data.size())
121 if (data[pos] == 0)
122 continue;
124 int lineBegin = pos;
125 int lineEnd = data.findData((const BYTE*)"\n", 1, lineBegin);
126 if (lineEnd < 0)
127 lineEnd = (int)data.size();
129 if (lineEnd > lineBegin)
131 if (data[lineBegin] != '\t')
133 if (expectHash)
135 expectHash = false;
136 if (lineEnd - lineBegin > 40)
138 hash.ConvertFromStrA((char*)&data[lineBegin]);
140 int hashEnd = lineBegin + 40;
141 int originalLineNumberBegin = hashEnd + 1;
142 int originalLineNumberEnd = data.findData((const BYTE*)" ", 1, originalLineNumberBegin);
143 if (originalLineNumberEnd >= 0)
145 originalLineNumber = atoi(CStringA((LPCSTR)&data[originalLineNumberBegin], originalLineNumberEnd - originalLineNumberBegin));
146 int finalLineNumberBegin = originalLineNumberEnd + 1;
147 int finalLineNumberEnd = (numberOfSubsequentLines == 0) ? data.findData((const BYTE*)" ", 1, finalLineNumberBegin) : lineEnd;
148 if (finalLineNumberEnd >= 0)
150 finalLineNumber = atoi(CStringA((LPCSTR)&data[finalLineNumberBegin], finalLineNumberEnd - finalLineNumberBegin));
151 if (numberOfSubsequentLines == 0)
153 int numberOfSubsequentLinesBegin = finalLineNumberEnd + 1;
154 int numberOfSubsequentLinesEnd = lineEnd;
155 numberOfSubsequentLines = atoi(CStringA((LPCSTR)&data[numberOfSubsequentLinesBegin], numberOfSubsequentLinesEnd - numberOfSubsequentLinesBegin));
158 else
160 // parse error
161 finalLineNumber = 0;
162 numberOfSubsequentLines = 0;
165 else
167 // parse error
168 finalLineNumber = 0;
169 numberOfSubsequentLines = 0;
172 auto it = hashToFilename.find(hash);
173 if (it != hashToFilename.end())
174 filename = it->second;
175 else
176 filename.Empty();
178 else
180 // parse error
181 finalLineNumber = 0;
182 numberOfSubsequentLines = 0;
185 else
187 int tokenBegin = lineBegin;
188 int tokenEnd = data.findData((const BYTE*)" ", 1, tokenBegin);
189 if (tokenEnd >= 0)
191 if (!strncmp("filename", (const char*)&data[tokenBegin], tokenEnd - tokenBegin))
193 int filenameBegin = tokenEnd + 1;
194 int filenameEnd = lineEnd;
195 CStringA filenameA = CStringA((LPCSTR)&data[filenameBegin], filenameEnd - filenameBegin);
196 filename = UnquoteFilename(filenameA);
197 auto r = hashToFilename.insert(std::make_pair(hash, filename));
198 if (!r.second)
200 r.first->second = filename;
206 else
208 expectHash = true;
209 // remove <TAB> at start
210 BYTE_VECTOR line;
211 if (lineEnd - 1 > lineBegin)
212 line.append(&data[lineBegin + 1], lineEnd-lineBegin - 1);
214 hashes.push_back(hash);
215 filenames.push_back(filename);
216 originalLineNumbers.push_back(originalLineNumber);
217 rawLines.push_back(line);
218 --numberOfSubsequentLines;
221 pos = lineEnd + 1;
224 for (auto it = hashes.begin(), it_end = hashes.end(); it != it_end; ++it)
226 CGitHash hash = *it;
227 GitRev *pRev = GetRevForHash(HashToRev, hash);
228 if (pRev)
230 authors.push_back(pRev->GetAuthorName());
231 dates.push_back(CLoglistUtils::FormatDateAndTime(pRev->GetAuthorDate(), dateFormat, true, bRelativeTimes));
233 else
235 authors.push_back(CString());
236 dates.push_back(CString());
240 m_Hash.swap(hashes);
241 m_OriginalLineNumbers.swap(originalLineNumbers);
242 m_Filenames.swap(filenames);
243 m_RawLines.swap(rawLines);
245 m_Authors.swap(authors);
246 m_Dates.swap(dates);
247 // reset detected and applied encoding
248 m_encode = -1;
249 m_Utf8Lines.clear();
252 int CTortoiseGitBlameData::UpdateEncoding(int encode)
254 int encoding = encode;
255 int bomoffset = 0;
256 if (encoding==0)
258 BYTE_VECTOR all;
259 for (auto it = m_RawLines.begin(); it != m_RawLines.end(); ++it)
261 if (!it->empty())
262 all.append(&(*it)[0], it->size());
264 encoding = GetEncode(&all[0], (int)all.size(), &bomoffset);
267 if (encoding != m_encode)
269 m_encode = encoding;
271 m_Utf8Lines.resize(m_RawLines.size());
272 for (size_t i_Lines = 0; i_Lines < m_RawLines.size(); ++i_Lines)
274 const BYTE_VECTOR& rawLine = m_RawLines[i_Lines];
276 int bomoffset = 0;
277 CStringA lineUtf8;
278 lineUtf8.Empty();
280 if (!rawLine.empty())
282 if (encoding == 1201)
284 CString line;
285 int size = (int)((rawLine.size() - bomoffset)/2);
286 TCHAR *buffer = line.GetBuffer(size);
287 memcpy(buffer, &rawLine[bomoffset], sizeof(TCHAR)*size);
288 // swap the bytes to little-endian order to get proper strings in wchar_t format
289 wchar_t * pSwapBuf = buffer;
290 for (int i = 0; i < size; ++i)
292 *pSwapBuf = WideCharSwap2(*pSwapBuf);
293 ++pSwapBuf;
295 line.ReleaseBuffer();
297 lineUtf8 = CUnicodeUtils::GetUTF8(line);
299 else if (encoding == 1200)
301 CString line;
302 // the first bomoffset is 2, after that it's 1 (see issue #920)
303 // also: don't set bomoffset if called from Encodings menu (i.e. start == 42 and bomoffset == 0); bomoffset gets only set if autodetected
304 if (bomoffset == 0 && i_Lines != 0)
306 bomoffset = 1;
308 int size = (int)((rawLine.size() - bomoffset)/2);
309 TCHAR *buffer = line.GetBuffer(size);
310 memcpy(buffer, &rawLine[bomoffset], sizeof(TCHAR) * size);
311 line.ReleaseBuffer();
313 lineUtf8 = CUnicodeUtils::GetUTF8(line);
315 else if (encoding == CP_UTF8)
316 lineUtf8 = CStringA((LPCSTR)&rawLine[bomoffset], (int)(rawLine.size() - bomoffset));
317 else
319 CString line = CUnicodeUtils::GetUnicode(CStringA((LPCSTR)&rawLine[bomoffset], (int)(rawLine.size() - bomoffset)), encoding);
320 lineUtf8 = CUnicodeUtils::GetUTF8(line);
324 m_Utf8Lines[i_Lines] = lineUtf8;
325 bomoffset = 0;
328 return encoding;
331 int CTortoiseGitBlameData::FindNextLine(CGitHash& CommitHash, int line, bool bUpOrDown)
333 int startline = line;
334 bool findNoMatch = false;
335 while (line >= 0 && line < (int)m_Hash.size())
337 if (m_Hash[line] != CommitHash)
338 findNoMatch = true;
340 if (m_Hash[line] == CommitHash && findNoMatch)
342 if (line == startline + 2)
343 findNoMatch = false;
344 else
346 if (bUpOrDown)
347 line = FindFirstLineInBlock(CommitHash, line);
348 return line;
351 if (bUpOrDown)
352 --line;
353 else
354 ++line;
356 return -1;
359 static int FindAsciiLower(const CStringA &str, const CStringA &find)
361 if (find.GetLength() == 0)
362 return 0;
364 for (int i = 0; i < str.GetLength(); ++i)
366 char c = str[i];
367 c += (c >= 'A' && c <= 'Z') ? 32 : 0;
368 if (c == find[0])
370 bool diff = false;
371 int k = 1;
372 for (int j = i + 1; j < str.GetLength() && k < find.GetLength(); ++j, ++k)
374 char d = str[j];
375 d += (d >= 'A' && d <= 'Z') ? 32 : 0;
376 if (d != find[k])
378 diff = true;
379 break;
383 if (!diff && k == find.GetLength())
384 return i;
388 return -1;
391 static int FindUtf8Lower(const CStringA& strA, bool allAscii, const CString &findW, const CStringA &findA)
393 if (allAscii)
394 return FindAsciiLower(strA, findA);
396 CString strW = CUnicodeUtils::GetUnicode(strA);
397 return strW.MakeLower().Find(findW);
400 int CTortoiseGitBlameData::FindFirstLineWrapAround(const CString& what, int line, bool bCaseSensitive)
402 bool allAscii = true;
403 for (int i = 0; i < what.GetLength(); ++i)
405 if (what[i] > 0x7f)
407 allAscii = false;
408 break;
411 CString whatNormalized(what);
412 if (!bCaseSensitive)
414 whatNormalized.MakeLower();
417 CStringA whatNormalizedUtf8 = CUnicodeUtils::GetUTF8(whatNormalized);
419 bool bFound = false;
421 int i = line;
422 int numberOfLines = GetNumberOfLines();
423 if (line < 0 || line + 1 >= numberOfLines)
424 i = 0;
428 if (bCaseSensitive)
430 if (m_Authors[i].Find(whatNormalized) >= 0)
431 bFound = true;
432 else if (m_Utf8Lines[i].Find(whatNormalizedUtf8) >=0)
433 bFound = true;
435 else
437 if (CString(m_Authors[i]).MakeLower().Find(whatNormalized) >= 0)
438 bFound = true;
439 else if (FindUtf8Lower(m_Utf8Lines[i], allAscii, whatNormalized, whatNormalizedUtf8) >= 0)
440 bFound = true;
443 if(bFound)
445 break;
447 else
449 ++i;
450 if (i >= numberOfLines)
451 i = 0;
453 } while (i != line);
455 return bFound ? i : -1;
458 bool CTortoiseGitBlameData::ContainsOnlyFilename(const CString &filename) const
460 for (auto it = m_Filenames.cbegin(); it != m_Filenames.cend(); ++it)
462 if (filename != *it)
463 return false;
465 return true;
468 GitRev* CTortoiseGitBlameData::GetRevForHash(CGitHashMap & HashToRev, CGitHash& hash)
470 auto it = HashToRev.find(hash);
471 if (it == HashToRev.end())
473 GitRev rev;
474 rev.GetCommitFromHash(hash);
475 it = HashToRev.insert(std::make_pair(hash, rev)).first;
477 return &(it->second);
480 CString CTortoiseGitBlameData::UnquoteFilename(CStringA& s)
482 if (s[0] == '"')
484 CStringA ret;
485 int i_size = s.GetLength();
486 bool isEscaped = false;
487 for (int i = 1; i < i_size; ++i)
489 char c = s[i];
490 if (isEscaped)
492 if (c >= '0' && c <= '3')
494 if (i + 2 < i_size)
496 c = (((c - '0') & 03) << 6) | (((s[i + 1] - '0') & 07) << 3) | ((s[i + 2] - '0') & 07);
497 i += 2;
498 ret += c;
501 else
503 switch (c)
505 case 'a' : c = '\a'; break;
506 case 'b' : c = '\b'; break;
507 case 't' : c = '\t'; break;
508 case 'n' : c = '\n'; break;
509 case 'v' : c = '\v'; break;
510 case 'f' : c = '\f'; break;
511 case 'r' : c = '\r'; break;
513 ret += c;
515 isEscaped = false;
517 else
519 if (c == '\\')
521 isEscaped = true;
523 else if(c == '"')
525 break;
527 else
529 ret += c;
533 return CUnicodeUtils::GetUnicode(ret);
535 else
536 return CUnicodeUtils::GetUnicode(s);