Adjust for Scintilla 4.0.3 and fix warnings
[TortoiseGit.git] / src / TortoiseGitBlame / TortoiseGitBlameData.cpp
blobcda77f854ce825b9e0015e62b8c03cbcf7910382
1 // TortoiseGitBlame - a Viewer for Git Blames
3 // Copyright (C) 2008-2018 - TortoiseGit
4 // Copyright (C) 2003 Don HO <donho@altern.org>
6 // This program is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU General Public License
8 // as published by the Free Software Foundation; either version 2
9 // of the License, or (at your option) any later version.
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with this program; if not, write to the Free Software Foundation,
18 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 // CTortoiseGitBlameData.cpp : implementation of the CTortoiseGitBlameData class
23 #include "stdafx.h"
24 #include "TortoiseGitBlameData.h"
25 #include "LoglistUtils.h"
26 #include "FileTextLines.h"
27 #include "UnicodeUtils.h"
29 wchar_t WideCharSwap2(wchar_t nValue)
31 return (((nValue>> 8)) | (nValue << 8));
34 // CTortoiseGitBlameData construction/destruction
36 CTortoiseGitBlameData::CTortoiseGitBlameData()
37 : m_encode(-1)
41 CTortoiseGitBlameData::~CTortoiseGitBlameData()
45 int CTortoiseGitBlameData::GetEncode(unsigned char *buff, int size, int *bomoffset)
47 CFileTextLines textlines;
48 CFileTextLines::UnicodeType type = textlines.CheckUnicodeType(buff, size);
50 if (type == CFileTextLines::UTF8BOM)
52 *bomoffset = 3;
53 return CP_UTF8;
55 if (type == CFileTextLines::UTF8)
56 return CP_UTF8;
58 if (type == CFileTextLines::UTF16_LE)
59 return 1200;
60 if (type == CFileTextLines::UTF16_LEBOM)
62 *bomoffset = 2;
63 return 1200;
66 if (type == CFileTextLines::UTF16_BE)
67 return 1201;
68 if (type == CFileTextLines::UTF16_BEBOM)
70 *bomoffset = 2;
71 return 1201;
74 return GetACP();
77 int CTortoiseGitBlameData::GetEncode(int *bomoffset)
79 int encoding = 0;
80 BYTE_VECTOR rawAll;
81 for (const auto& rawBytes : m_RawLines)
82 rawAll.append(rawBytes.data(), rawBytes.size());
83 encoding = GetEncode(rawAll.data(), (int)rawAll.size(), bomoffset);
84 return encoding;
87 void CTortoiseGitBlameData::ParseBlameOutput(BYTE_VECTOR &data, CGitHashMap & HashToRev, DWORD dateFormat, bool bRelativeTimes)
89 std::unordered_map<CGitHash, CString> hashToFilename;
91 std::vector<CGitHash> hashes;
92 std::vector<int> originalLineNumbers;
93 std::vector<CString> filenames;
94 std::vector<BYTE_VECTOR> rawLines;
95 std::vector<CString> authors;
96 std::vector<CString> dates;
98 CGitHash hash;
99 int originalLineNumber = 0;
100 int numberOfSubsequentLines = 0;
101 CString filename;
103 size_t pos = 0;
104 bool expectHash = true;
105 while (pos < data.size())
107 if (data[pos] == 0)
109 ++pos;
110 continue;
113 size_t lineBegin = pos;
114 size_t lineEnd = data.find('\n', lineBegin);
115 if (lineEnd == BYTE_VECTOR::npos)
116 lineEnd = data.size();
118 if (lineEnd > lineBegin)
120 if (data[lineBegin] != '\t')
122 if (expectHash)
124 expectHash = false;
125 if (lineEnd - lineBegin > 2 * GIT_HASH_SIZE)
127 hash.ConvertFromStrA((char*)&data[lineBegin]);
129 size_t hashEnd = lineBegin + 2 * GIT_HASH_SIZE;
130 size_t originalLineNumberBegin = hashEnd + 1;
131 size_t originalLineNumberEnd = data.find(' ', originalLineNumberBegin);
132 if (originalLineNumberEnd != BYTE_VECTOR::npos)
134 originalLineNumber = atoi(CStringA((LPCSTR)&data[originalLineNumberBegin], (int)(originalLineNumberEnd - originalLineNumberBegin)));
135 size_t finalLineNumberBegin = originalLineNumberEnd + 1;
136 size_t finalLineNumberEnd = (numberOfSubsequentLines == 0) ? data.find(' ', finalLineNumberBegin) : lineEnd;
137 if (finalLineNumberEnd != BYTE_VECTOR::npos)
139 if (numberOfSubsequentLines == 0)
141 size_t numberOfSubsequentLinesBegin = finalLineNumberEnd + 1;
142 size_t numberOfSubsequentLinesEnd = lineEnd;
143 numberOfSubsequentLines = atoi(CStringA((LPCSTR)&data[numberOfSubsequentLinesBegin], (int)(numberOfSubsequentLinesEnd - numberOfSubsequentLinesBegin)));
146 else
148 // parse error
149 numberOfSubsequentLines = 0;
152 else
154 // parse error
155 numberOfSubsequentLines = 0;
158 auto it = hashToFilename.find(hash);
159 if (it != hashToFilename.end())
160 filename = it->second;
161 else
162 filename.Empty();
164 else
166 // parse error
167 numberOfSubsequentLines = 0;
170 else
172 size_t tokenBegin = lineBegin;
173 size_t tokenEnd = data.find(' ', tokenBegin);
174 if (tokenEnd != BYTE_VECTOR::npos)
176 if (!strncmp("filename", (const char*)&data[tokenBegin], tokenEnd - tokenBegin))
178 size_t filenameBegin = tokenEnd + 1;
179 size_t filenameEnd = lineEnd;
180 CStringA filenameA = CStringA((LPCSTR)&data[filenameBegin], (int)(filenameEnd - filenameBegin));
181 filename = UnquoteFilename(filenameA);
182 auto r = hashToFilename.emplace(hash, filename);
183 if (!r.second)
185 r.first->second = filename;
191 else
193 expectHash = true;
194 // remove <TAB> at start
195 BYTE_VECTOR line;
196 if (lineEnd - 1 > lineBegin)
197 line.append(&data[lineBegin + 1], lineEnd-lineBegin - 1);
199 while (!line.empty() && line[line.size() - 1] == 13)
200 line.pop_back();
202 hashes.push_back(hash);
203 filenames.push_back(filename);
204 originalLineNumbers.push_back(originalLineNumber);
205 rawLines.push_back(line);
206 --numberOfSubsequentLines;
209 pos = lineEnd + 1;
212 for (const auto& hash2 : hashes)
214 CString err;
215 GitRev* pRev = GetRevForHash(HashToRev, hash2, &err);
216 if (pRev)
218 authors.push_back(pRev->GetAuthorName());
219 dates.push_back(CLoglistUtils::FormatDateAndTime(pRev->GetAuthorDate(), dateFormat, true, bRelativeTimes));
221 else
223 MessageBox(nullptr, err, L"TortoiseGit", MB_ICONERROR);
224 authors.emplace_back();
225 dates.emplace_back();
229 m_Hash.swap(hashes);
230 m_OriginalLineNumbers.swap(originalLineNumbers);
231 m_Filenames.swap(filenames);
232 m_RawLines.swap(rawLines);
234 m_Authors.swap(authors);
235 m_Dates.swap(dates);
236 // reset detected and applied encoding
237 m_encode = -1;
238 m_Utf8Lines.clear();
241 int CTortoiseGitBlameData::UpdateEncoding(int encode)
243 int encoding = encode;
244 int bomoffset = 0;
245 if (encoding==0)
247 BYTE_VECTOR all;
248 for (const auto& rawLine : m_RawLines)
250 if (!rawLine.empty())
251 all.append(rawLine.data(), rawLine.size());
253 encoding = GetEncode(all.data(), (int)all.size(), &bomoffset);
256 if (encoding != m_encode)
258 m_encode = encoding;
260 m_Utf8Lines.resize(m_RawLines.size());
261 for (size_t i_Lines = 0; i_Lines < m_RawLines.size(); ++i_Lines)
263 const BYTE_VECTOR& rawLine = m_RawLines[i_Lines];
265 int linebomoffset = 0;
266 CStringA lineUtf8;
267 lineUtf8.Empty();
269 if (!rawLine.empty())
271 if (encoding == 1201)
273 CString line;
274 int size = (int)((rawLine.size() - linebomoffset) / 2);
275 TCHAR *buffer = line.GetBuffer(size);
276 memcpy(buffer, &rawLine[linebomoffset], sizeof(TCHAR) * size);
277 // swap the bytes to little-endian order to get proper strings in wchar_t format
278 wchar_t * pSwapBuf = buffer;
279 for (int i = 0; i < size; ++i)
281 *pSwapBuf = WideCharSwap2(*pSwapBuf);
282 ++pSwapBuf;
284 line.ReleaseBuffer();
286 lineUtf8 = CUnicodeUtils::GetUTF8(line);
288 else if (encoding == 1200)
290 CString line;
291 // the first bomoffset is 2, after that it's 1 (see issue #920)
292 // also: don't set bomoffset if called from Encodings menu (i.e. start == 42 and bomoffset == 0); bomoffset gets only set if autodetected
293 if (linebomoffset == 0 && i_Lines != 0)
295 linebomoffset = 1;
297 int size = (int)((rawLine.size() - linebomoffset) / 2);
298 TCHAR *buffer = line.GetBuffer(size);
299 memcpy(buffer, &rawLine[linebomoffset], sizeof(TCHAR) * size);
300 line.ReleaseBuffer();
302 lineUtf8 = CUnicodeUtils::GetUTF8(line);
304 else if (encoding == CP_UTF8)
305 lineUtf8 = CStringA((LPCSTR)&rawLine[linebomoffset], (int)(rawLine.size() - linebomoffset));
306 else
308 CString line = CUnicodeUtils::GetUnicode(CStringA((LPCSTR)&rawLine[linebomoffset], (int)(rawLine.size() - linebomoffset)), encoding);
309 lineUtf8 = CUnicodeUtils::GetUTF8(line);
313 m_Utf8Lines[i_Lines] = lineUtf8;
314 linebomoffset = 0;
317 return encoding;
320 int CTortoiseGitBlameData::FindNextLine(CGitHash& CommitHash, int line, bool bUpOrDown)
322 int startline = line;
323 bool findNoMatch = false;
324 while (line >= 0 && line < (int)m_Hash.size())
326 if (m_Hash[line] != CommitHash)
327 findNoMatch = true;
329 if (m_Hash[line] == CommitHash && findNoMatch)
331 if (line == startline + 2)
332 findNoMatch = false;
333 else
335 if (bUpOrDown)
336 line = FindFirstLineInBlock(CommitHash, line);
337 return line;
340 if (bUpOrDown)
341 --line;
342 else
343 ++line;
345 return -1;
348 static int FindAsciiLower(const CStringA &str, const CStringA &find)
350 if (find.IsEmpty())
351 return 0;
353 for (int i = 0; i < str.GetLength(); ++i)
355 char c = str[i];
356 c += (c >= 'A' && c <= 'Z') ? 32 : 0;
357 if (c == find[0])
359 bool diff = false;
360 int k = 1;
361 for (int j = i + 1; j < str.GetLength() && k < find.GetLength(); ++j, ++k)
363 char d = str[j];
364 d += (d >= 'A' && d <= 'Z') ? 32 : 0;
365 if (d != find[k])
367 diff = true;
368 break;
372 if (!diff && k == find.GetLength())
373 return i;
377 return -1;
380 static int FindUtf8Lower(const CStringA& strA, bool allAscii, const CString &findW, const CStringA &findA)
382 if (allAscii)
383 return FindAsciiLower(strA, findA);
385 CString strW = CUnicodeUtils::GetUnicode(strA);
386 return strW.MakeLower().Find(findW);
389 int CTortoiseGitBlameData::FindFirstLineWrapAround(SearchDirection direction, const CString& what, int line, bool bCaseSensitive, std::function<void()> wraparound)
391 bool allAscii = true;
392 for (int i = 0; i < what.GetLength(); ++i)
394 if (what[i] > 0x7f)
396 allAscii = false;
397 break;
400 CString whatNormalized(what);
401 if (!bCaseSensitive)
402 whatNormalized.MakeLower();
404 CStringA whatNormalizedUtf8 = CUnicodeUtils::GetUTF8(whatNormalized);
406 auto numberOfLines = (int)GetNumberOfLines();
407 if (numberOfLines == 0)
408 return -1;
409 int i = line;
410 if (direction == SearchPrevious)
412 i -= 2;
413 if (i < 0)
414 i = numberOfLines - 1;
416 else if (line < 0 || line + 1 >= numberOfLines)
417 i = 0;
421 if (bCaseSensitive)
423 if (m_Authors[i].Find(whatNormalized) >= 0)
424 return i;
425 else if (m_Utf8Lines[i].Find(whatNormalizedUtf8) >=0)
426 return i;
428 else
430 if (CString(m_Authors[i]).MakeLower().Find(whatNormalized) >= 0)
431 return i;
432 else if (FindUtf8Lower(m_Utf8Lines[i], allAscii, whatNormalized, whatNormalizedUtf8) >= 0)
433 return i;
436 if (direction == SearchNext)
438 ++i;
439 if (i >= numberOfLines)
441 i = 0;
442 if (wraparound)
443 wraparound();
446 else if (direction == SearchPrevious)
448 --i;
449 if (i < 0)
451 i = numberOfLines - 2;
452 if (wraparound)
453 wraparound();
456 } while (i != line);
458 return -1;
461 bool CTortoiseGitBlameData::ContainsOnlyFilename(const CString &filename) const
463 for (const auto& name : m_Filenames)
465 if (filename != name)
466 return false;
468 return true;
471 GitRevLoglist* CTortoiseGitBlameData::GetRevForHash(CGitHashMap& HashToRev, const CGitHash& hash, CString* err)
473 auto it = HashToRev.find(hash);
474 if (it == HashToRev.end())
476 GitRevLoglist rev;
477 if (rev.GetCommitFromHash(hash))
479 *err = rev.GetLastErr();
480 return nullptr;
482 it = HashToRev.emplace(hash, rev).first;
484 return &(it->second);
487 CString CTortoiseGitBlameData::UnquoteFilename(CStringA& s)
489 if (s[0] == '"')
491 CStringA ret;
492 int i_size = s.GetLength();
493 bool isEscaped = false;
494 for (int i = 1; i < i_size; ++i)
496 char c = s[i];
497 if (isEscaped)
499 if (c >= '0' && c <= '3')
501 if (i + 2 < i_size)
503 c = (((c - '0') & 03) << 6) | (((s[i + 1] - '0') & 07) << 3) | ((s[i + 2] - '0') & 07);
504 i += 2;
505 ret += c;
508 else
510 switch (c)
512 case 'a' : c = '\a'; break;
513 case 'b' : c = '\b'; break;
514 case 't' : c = '\t'; break;
515 case 'n' : c = '\n'; break;
516 case 'v' : c = '\v'; break;
517 case 'f' : c = '\f'; break;
518 case 'r' : c = '\r'; break;
520 ret += c;
522 isEscaped = false;
524 else
526 if (c == '\\')
528 isEscaped = true;
530 else if(c == '"')
532 break;
534 else
536 ret += c;
540 return CUnicodeUtils::GetUnicode(ret);
542 else
543 return CUnicodeUtils::GetUnicode(s);