1 // TortoiseGitBlame - a Viewer for Git Blames
3 // Copyright (C) 2008-2018 - TortoiseGit
4 // Copyright (C) 2003 Don HO <donho@altern.org>
6 // This program is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU General Public License
8 // as published by the Free Software Foundation; either version 2
9 // of the License, or (at your option) any later version.
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with this program; if not, write to the Free Software Foundation,
18 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 // CTortoiseGitBlameData.cpp : implementation of the CTortoiseGitBlameData class
24 #include "TortoiseGitBlameData.h"
25 #include "LoglistUtils.h"
26 #include "FileTextLines.h"
27 #include "UnicodeUtils.h"
29 wchar_t WideCharSwap2(wchar_t nValue
)
31 return (((nValue
>> 8)) | (nValue
<< 8));
34 // CTortoiseGitBlameData construction/destruction
36 CTortoiseGitBlameData::CTortoiseGitBlameData()
41 CTortoiseGitBlameData::~CTortoiseGitBlameData()
45 int CTortoiseGitBlameData::GetEncode(unsigned char *buff
, int size
, int *bomoffset
)
47 CFileTextLines textlines
;
48 CFileTextLines::UnicodeType type
= textlines
.CheckUnicodeType(buff
, size
);
50 if (type
== CFileTextLines::UTF8BOM
)
55 if (type
== CFileTextLines::UTF8
)
58 if (type
== CFileTextLines::UTF16_LE
)
60 if (type
== CFileTextLines::UTF16_LEBOM
)
66 if (type
== CFileTextLines::UTF16_BE
)
68 if (type
== CFileTextLines::UTF16_BEBOM
)
77 int CTortoiseGitBlameData::GetEncode(int *bomoffset
)
81 for (const auto& rawBytes
: m_RawLines
)
82 rawAll
.append(rawBytes
.data(), rawBytes
.size());
83 encoding
= GetEncode(rawAll
.data(), (int)rawAll
.size(), bomoffset
);
87 void CTortoiseGitBlameData::ParseBlameOutput(BYTE_VECTOR
&data
, CGitHashMap
& HashToRev
, DWORD dateFormat
, bool bRelativeTimes
)
89 std::unordered_map
<CGitHash
, CString
> hashToFilename
;
91 std::vector
<CGitHash
> hashes
;
92 std::vector
<int> originalLineNumbers
;
93 std::vector
<CString
> filenames
;
94 std::vector
<BYTE_VECTOR
> rawLines
;
95 std::vector
<CString
> authors
;
96 std::vector
<CString
> dates
;
99 int originalLineNumber
= 0;
100 int numberOfSubsequentLines
= 0;
104 bool expectHash
= true;
105 while (pos
< data
.size())
113 size_t lineBegin
= pos
;
114 size_t lineEnd
= data
.find('\n', lineBegin
);
115 if (lineEnd
== BYTE_VECTOR::npos
)
116 lineEnd
= data
.size();
118 if (lineEnd
> lineBegin
)
120 if (data
[lineBegin
] != '\t')
125 if (lineEnd
- lineBegin
> 2 * GIT_HASH_SIZE
)
127 hash
.ConvertFromStrA((char*)&data
[lineBegin
]);
129 size_t hashEnd
= lineBegin
+ 2 * GIT_HASH_SIZE
;
130 size_t originalLineNumberBegin
= hashEnd
+ 1;
131 size_t originalLineNumberEnd
= data
.find(' ', originalLineNumberBegin
);
132 if (originalLineNumberEnd
!= BYTE_VECTOR::npos
)
134 originalLineNumber
= atoi(CStringA((LPCSTR
)&data
[originalLineNumberBegin
], (int)(originalLineNumberEnd
- originalLineNumberBegin
)));
135 size_t finalLineNumberBegin
= originalLineNumberEnd
+ 1;
136 size_t finalLineNumberEnd
= (numberOfSubsequentLines
== 0) ? data
.find(' ', finalLineNumberBegin
) : lineEnd
;
137 if (finalLineNumberEnd
!= BYTE_VECTOR::npos
)
139 if (numberOfSubsequentLines
== 0)
141 size_t numberOfSubsequentLinesBegin
= finalLineNumberEnd
+ 1;
142 size_t numberOfSubsequentLinesEnd
= lineEnd
;
143 numberOfSubsequentLines
= atoi(CStringA((LPCSTR
)&data
[numberOfSubsequentLinesBegin
], (int)(numberOfSubsequentLinesEnd
- numberOfSubsequentLinesBegin
)));
149 numberOfSubsequentLines
= 0;
155 numberOfSubsequentLines
= 0;
158 auto it
= hashToFilename
.find(hash
);
159 if (it
!= hashToFilename
.end())
160 filename
= it
->second
;
167 numberOfSubsequentLines
= 0;
172 size_t tokenBegin
= lineBegin
;
173 size_t tokenEnd
= data
.find(' ', tokenBegin
);
174 if (tokenEnd
!= BYTE_VECTOR::npos
)
176 if (!strncmp("filename", (const char*)&data
[tokenBegin
], tokenEnd
- tokenBegin
))
178 size_t filenameBegin
= tokenEnd
+ 1;
179 size_t filenameEnd
= lineEnd
;
180 CStringA filenameA
= CStringA((LPCSTR
)&data
[filenameBegin
], (int)(filenameEnd
- filenameBegin
));
181 filename
= UnquoteFilename(filenameA
);
182 auto r
= hashToFilename
.emplace(hash
, filename
);
185 r
.first
->second
= filename
;
194 // remove <TAB> at start
196 if (lineEnd
- 1 > lineBegin
)
197 line
.append(&data
[lineBegin
+ 1], lineEnd
-lineBegin
- 1);
199 while (!line
.empty() && line
[line
.size() - 1] == 13)
202 hashes
.push_back(hash
);
203 filenames
.push_back(filename
);
204 originalLineNumbers
.push_back(originalLineNumber
);
205 rawLines
.push_back(line
);
206 --numberOfSubsequentLines
;
212 for (const auto& hash2
: hashes
)
215 GitRev
* pRev
= GetRevForHash(HashToRev
, hash2
, &err
);
218 authors
.push_back(pRev
->GetAuthorName());
219 dates
.push_back(CLoglistUtils::FormatDateAndTime(pRev
->GetAuthorDate(), dateFormat
, true, bRelativeTimes
));
223 MessageBox(nullptr, err
, L
"TortoiseGit", MB_ICONERROR
);
224 authors
.emplace_back();
225 dates
.emplace_back();
230 m_OriginalLineNumbers
.swap(originalLineNumbers
);
231 m_Filenames
.swap(filenames
);
232 m_RawLines
.swap(rawLines
);
234 m_Authors
.swap(authors
);
236 // reset detected and applied encoding
241 int CTortoiseGitBlameData::UpdateEncoding(int encode
)
243 int encoding
= encode
;
248 for (const auto& rawLine
: m_RawLines
)
250 if (!rawLine
.empty())
251 all
.append(rawLine
.data(), rawLine
.size());
253 encoding
= GetEncode(all
.data(), (int)all
.size(), &bomoffset
);
256 if (encoding
!= m_encode
)
260 m_Utf8Lines
.resize(m_RawLines
.size());
261 for (size_t i_Lines
= 0; i_Lines
< m_RawLines
.size(); ++i_Lines
)
263 const BYTE_VECTOR
& rawLine
= m_RawLines
[i_Lines
];
265 int linebomoffset
= 0;
269 if (!rawLine
.empty())
271 if (encoding
== 1201)
274 int size
= (int)((rawLine
.size() - linebomoffset
) / 2);
275 TCHAR
*buffer
= line
.GetBuffer(size
);
276 memcpy(buffer
, &rawLine
[linebomoffset
], sizeof(TCHAR
) * size
);
277 // swap the bytes to little-endian order to get proper strings in wchar_t format
278 wchar_t * pSwapBuf
= buffer
;
279 for (int i
= 0; i
< size
; ++i
)
281 *pSwapBuf
= WideCharSwap2(*pSwapBuf
);
284 line
.ReleaseBuffer();
286 lineUtf8
= CUnicodeUtils::GetUTF8(line
);
288 else if (encoding
== 1200)
291 // the first bomoffset is 2, after that it's 1 (see issue #920)
292 // also: don't set bomoffset if called from Encodings menu (i.e. start == 42 and bomoffset == 0); bomoffset gets only set if autodetected
293 if (linebomoffset
== 0 && i_Lines
!= 0)
297 int size
= (int)((rawLine
.size() - linebomoffset
) / 2);
298 TCHAR
*buffer
= line
.GetBuffer(size
);
299 memcpy(buffer
, &rawLine
[linebomoffset
], sizeof(TCHAR
) * size
);
300 line
.ReleaseBuffer();
302 lineUtf8
= CUnicodeUtils::GetUTF8(line
);
304 else if (encoding
== CP_UTF8
)
305 lineUtf8
= CStringA((LPCSTR
)&rawLine
[linebomoffset
], (int)(rawLine
.size() - linebomoffset
));
308 CString line
= CUnicodeUtils::GetUnicode(CStringA((LPCSTR
)&rawLine
[linebomoffset
], (int)(rawLine
.size() - linebomoffset
)), encoding
);
309 lineUtf8
= CUnicodeUtils::GetUTF8(line
);
313 m_Utf8Lines
[i_Lines
] = lineUtf8
;
320 int CTortoiseGitBlameData::FindNextLine(CGitHash
& CommitHash
, int line
, bool bUpOrDown
)
322 int startline
= line
;
323 bool findNoMatch
= false;
324 while (line
>= 0 && line
< (int)m_Hash
.size())
326 if (m_Hash
[line
] != CommitHash
)
329 if (m_Hash
[line
] == CommitHash
&& findNoMatch
)
331 if (line
== startline
+ 2)
336 line
= FindFirstLineInBlock(CommitHash
, line
);
348 static int FindAsciiLower(const CStringA
&str
, const CStringA
&find
)
353 for (int i
= 0; i
< str
.GetLength(); ++i
)
356 c
+= (c
>= 'A' && c
<= 'Z') ? 32 : 0;
361 for (int j
= i
+ 1; j
< str
.GetLength() && k
< find
.GetLength(); ++j
, ++k
)
364 d
+= (d
>= 'A' && d
<= 'Z') ? 32 : 0;
372 if (!diff
&& k
== find
.GetLength())
380 static int FindUtf8Lower(const CStringA
& strA
, bool allAscii
, const CString
&findW
, const CStringA
&findA
)
383 return FindAsciiLower(strA
, findA
);
385 CString strW
= CUnicodeUtils::GetUnicode(strA
);
386 return strW
.MakeLower().Find(findW
);
389 int CTortoiseGitBlameData::FindFirstLineWrapAround(SearchDirection direction
, const CString
& what
, int line
, bool bCaseSensitive
, std::function
<void()> wraparound
)
391 bool allAscii
= true;
392 for (int i
= 0; i
< what
.GetLength(); ++i
)
400 CString
whatNormalized(what
);
402 whatNormalized
.MakeLower();
404 CStringA whatNormalizedUtf8
= CUnicodeUtils::GetUTF8(whatNormalized
);
406 auto numberOfLines
= (int)GetNumberOfLines();
407 if (numberOfLines
== 0)
410 if (direction
== SearchPrevious
)
414 i
= numberOfLines
- 1;
416 else if (line
< 0 || line
+ 1 >= numberOfLines
)
423 if (m_Authors
[i
].Find(whatNormalized
) >= 0)
425 else if (m_Utf8Lines
[i
].Find(whatNormalizedUtf8
) >=0)
430 if (CString(m_Authors
[i
]).MakeLower().Find(whatNormalized
) >= 0)
432 else if (FindUtf8Lower(m_Utf8Lines
[i
], allAscii
, whatNormalized
, whatNormalizedUtf8
) >= 0)
436 if (direction
== SearchNext
)
439 if (i
>= numberOfLines
)
446 else if (direction
== SearchPrevious
)
451 i
= numberOfLines
- 2;
461 bool CTortoiseGitBlameData::ContainsOnlyFilename(const CString
&filename
) const
463 for (const auto& name
: m_Filenames
)
465 if (filename
!= name
)
471 GitRevLoglist
* CTortoiseGitBlameData::GetRevForHash(CGitHashMap
& HashToRev
, const CGitHash
& hash
, CString
* err
)
473 auto it
= HashToRev
.find(hash
);
474 if (it
== HashToRev
.end())
477 if (rev
.GetCommitFromHash(hash
))
479 *err
= rev
.GetLastErr();
482 it
= HashToRev
.emplace(hash
, rev
).first
;
484 return &(it
->second
);
487 CString
CTortoiseGitBlameData::UnquoteFilename(CStringA
& s
)
492 int i_size
= s
.GetLength();
493 bool isEscaped
= false;
494 for (int i
= 1; i
< i_size
; ++i
)
499 if (c
>= '0' && c
<= '3')
503 c
= (((c
- '0') & 03) << 6) | (((s
[i
+ 1] - '0') & 07) << 3) | ((s
[i
+ 2] - '0') & 07);
512 case 'a' : c
= '\a'; break;
513 case 'b' : c
= '\b'; break;
514 case 't' : c
= '\t'; break;
515 case 'n' : c
= '\n'; break;
516 case 'v' : c
= '\v'; break;
517 case 'f' : c
= '\f'; break;
518 case 'r' : c
= '\r'; break;
540 return CUnicodeUtils::GetUnicode(ret
);
543 return CUnicodeUtils::GetUnicode(s
);