1 // TortoiseGitBlame - a Viewer for Git Blames
3 // Copyright (C) 2008-2016 - TortoiseGit
4 // Copyright (C) 2003 Don HO <donho@altern.org>
6 // This program is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU General Public License
8 // as published by the Free Software Foundation; either version 2
9 // of the License, or (at your option) any later version.
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with this program; if not, write to the Free Software Foundation,
18 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 // CTortoiseGitBlameData.cpp : implementation of the CTortoiseGitBlameData class
24 #include "TortoiseGitBlameData.h"
25 #include "LoglistUtils.h"
26 #include "FileTextLines.h"
27 #include "UnicodeUtils.h"
29 wchar_t WideCharSwap2(wchar_t nValue
)
31 return (((nValue
>> 8)) | (nValue
<< 8));
34 // CTortoiseGitBlameData construction/destruction
36 CTortoiseGitBlameData::CTortoiseGitBlameData()
41 CTortoiseGitBlameData::~CTortoiseGitBlameData()
45 int CTortoiseGitBlameData::GetEncode(unsigned char *buff
, int size
, int *bomoffset
)
47 CFileTextLines textlines
;
48 CFileTextLines::UnicodeType type
= textlines
.CheckUnicodeType(buff
, size
);
50 if (type
== CFileTextLines::UTF8BOM
)
55 if (type
== CFileTextLines::UTF8
)
58 if (type
== CFileTextLines::UTF16_LE
)
60 if (type
== CFileTextLines::UTF16_LEBOM
)
66 if (type
== CFileTextLines::UTF16_BE
)
68 if (type
== CFileTextLines::UTF16_BEBOM
)
77 int CTortoiseGitBlameData::GetEncode(int *bomoffset
)
81 for (const auto& rawBytes
: m_RawLines
)
82 rawAll
.append(&rawBytes
[0], rawBytes
.size());
83 encoding
= GetEncode(&rawAll
[0], (int)rawAll
.size(), bomoffset
);
87 void CTortoiseGitBlameData::ParseBlameOutput(BYTE_VECTOR
&data
, CGitHashMap
& HashToRev
, DWORD dateFormat
, bool bRelativeTimes
)
89 std::unordered_map
<CGitHash
, CString
> hashToFilename
;
91 std::vector
<CGitHash
> hashes
;
92 std::vector
<int> originalLineNumbers
;
93 std::vector
<CString
> filenames
;
94 std::vector
<BYTE_VECTOR
> rawLines
;
95 std::vector
<CString
> authors
;
96 std::vector
<CString
> dates
;
99 int originalLineNumber
= 0;
100 int finalLineNumber
= 0;
101 int numberOfSubsequentLines
= 0;
105 bool expectHash
= true;
106 while (pos
< data
.size())
114 size_t lineBegin
= pos
;
115 size_t lineEnd
= data
.find('\n', lineBegin
);
116 if (lineEnd
== BYTE_VECTOR::npos
)
117 lineEnd
= data
.size();
119 if (lineEnd
> lineBegin
)
121 if (data
[lineBegin
] != '\t')
126 if (lineEnd
- lineBegin
> 40)
128 hash
.ConvertFromStrA((char*)&data
[lineBegin
]);
130 size_t hashEnd
= lineBegin
+ 40;
131 size_t originalLineNumberBegin
= hashEnd
+ 1;
132 size_t originalLineNumberEnd
= data
.find(' ', originalLineNumberBegin
);
133 if (originalLineNumberEnd
!= BYTE_VECTOR::npos
)
135 originalLineNumber
= atoi(CStringA((LPCSTR
)&data
[originalLineNumberBegin
], (int)(originalLineNumberEnd
- originalLineNumberBegin
)));
136 size_t finalLineNumberBegin
= originalLineNumberEnd
+ 1;
137 size_t finalLineNumberEnd
= (numberOfSubsequentLines
== 0) ? data
.find(' ', finalLineNumberBegin
) : lineEnd
;
138 if (finalLineNumberEnd
!= BYTE_VECTOR::npos
)
140 finalLineNumber
= atoi(CStringA((LPCSTR
)&data
[finalLineNumberBegin
], (int)(finalLineNumberEnd
- finalLineNumberBegin
)));
141 if (numberOfSubsequentLines
== 0)
143 size_t numberOfSubsequentLinesBegin
= finalLineNumberEnd
+ 1;
144 size_t numberOfSubsequentLinesEnd
= lineEnd
;
145 numberOfSubsequentLines
= atoi(CStringA((LPCSTR
)&data
[numberOfSubsequentLinesBegin
], (int)(numberOfSubsequentLinesEnd
- numberOfSubsequentLinesBegin
)));
152 numberOfSubsequentLines
= 0;
159 numberOfSubsequentLines
= 0;
162 auto it
= hashToFilename
.find(hash
);
163 if (it
!= hashToFilename
.end())
164 filename
= it
->second
;
172 numberOfSubsequentLines
= 0;
177 size_t tokenBegin
= lineBegin
;
178 size_t tokenEnd
= data
.find(' ', tokenBegin
);
179 if (tokenEnd
!= BYTE_VECTOR::npos
)
181 if (!strncmp("filename", (const char*)&data
[tokenBegin
], tokenEnd
- tokenBegin
))
183 size_t filenameBegin
= tokenEnd
+ 1;
184 size_t filenameEnd
= lineEnd
;
185 CStringA filenameA
= CStringA((LPCSTR
)&data
[filenameBegin
], (int)(filenameEnd
- filenameBegin
));
186 filename
= UnquoteFilename(filenameA
);
187 auto r
= hashToFilename
.emplace(hash
, filename
);
190 r
.first
->second
= filename
;
199 // remove <TAB> at start
201 if (lineEnd
- 1 > lineBegin
)
202 line
.append(&data
[lineBegin
+ 1], lineEnd
-lineBegin
- 1);
204 while (!line
.empty() && line
[line
.size() - 1] == 13)
207 hashes
.push_back(hash
);
208 filenames
.push_back(filename
);
209 originalLineNumbers
.push_back(originalLineNumber
);
210 rawLines
.push_back(line
);
211 --numberOfSubsequentLines
;
217 for (const auto& hash2
: hashes
)
220 GitRev
* pRev
= GetRevForHash(HashToRev
, hash2
, &err
);
223 authors
.push_back(pRev
->GetAuthorName());
224 dates
.push_back(CLoglistUtils::FormatDateAndTime(pRev
->GetAuthorDate(), dateFormat
, true, bRelativeTimes
));
228 MessageBox(nullptr, err
, L
"TortoiseGit", MB_ICONERROR
);
229 authors
.emplace_back();
230 dates
.emplace_back();
235 m_OriginalLineNumbers
.swap(originalLineNumbers
);
236 m_Filenames
.swap(filenames
);
237 m_RawLines
.swap(rawLines
);
239 m_Authors
.swap(authors
);
241 // reset detected and applied encoding
246 int CTortoiseGitBlameData::UpdateEncoding(int encode
)
248 int encoding
= encode
;
253 for (const auto& rawLine
: m_RawLines
)
255 if (!rawLine
.empty())
256 all
.append(&rawLine
[0], rawLine
.size());
258 encoding
= GetEncode(&all
[0], (int)all
.size(), &bomoffset
);
261 if (encoding
!= m_encode
)
265 m_Utf8Lines
.resize(m_RawLines
.size());
266 for (size_t i_Lines
= 0; i_Lines
< m_RawLines
.size(); ++i_Lines
)
268 const BYTE_VECTOR
& rawLine
= m_RawLines
[i_Lines
];
270 int linebomoffset
= 0;
274 if (!rawLine
.empty())
276 if (encoding
== 1201)
279 int size
= (int)((rawLine
.size() - linebomoffset
) / 2);
280 TCHAR
*buffer
= line
.GetBuffer(size
);
281 memcpy(buffer
, &rawLine
[linebomoffset
], sizeof(TCHAR
) * size
);
282 // swap the bytes to little-endian order to get proper strings in wchar_t format
283 wchar_t * pSwapBuf
= buffer
;
284 for (int i
= 0; i
< size
; ++i
)
286 *pSwapBuf
= WideCharSwap2(*pSwapBuf
);
289 line
.ReleaseBuffer();
291 lineUtf8
= CUnicodeUtils::GetUTF8(line
);
293 else if (encoding
== 1200)
296 // the first bomoffset is 2, after that it's 1 (see issue #920)
297 // also: don't set bomoffset if called from Encodings menu (i.e. start == 42 and bomoffset == 0); bomoffset gets only set if autodetected
298 if (linebomoffset
== 0 && i_Lines
!= 0)
302 int size
= (int)((rawLine
.size() - linebomoffset
) / 2);
303 TCHAR
*buffer
= line
.GetBuffer(size
);
304 memcpy(buffer
, &rawLine
[linebomoffset
], sizeof(TCHAR
) * size
);
305 line
.ReleaseBuffer();
307 lineUtf8
= CUnicodeUtils::GetUTF8(line
);
309 else if (encoding
== CP_UTF8
)
310 lineUtf8
= CStringA((LPCSTR
)&rawLine
[linebomoffset
], (int)(rawLine
.size() - linebomoffset
));
313 CString line
= CUnicodeUtils::GetUnicode(CStringA((LPCSTR
)&rawLine
[linebomoffset
], (int)(rawLine
.size() - linebomoffset
)), encoding
);
314 lineUtf8
= CUnicodeUtils::GetUTF8(line
);
318 m_Utf8Lines
[i_Lines
] = lineUtf8
;
325 int CTortoiseGitBlameData::FindNextLine(CGitHash
& CommitHash
, int line
, bool bUpOrDown
)
327 int startline
= line
;
328 bool findNoMatch
= false;
329 while (line
>= 0 && line
< (int)m_Hash
.size())
331 if (m_Hash
[line
] != CommitHash
)
334 if (m_Hash
[line
] == CommitHash
&& findNoMatch
)
336 if (line
== startline
+ 2)
341 line
= FindFirstLineInBlock(CommitHash
, line
);
353 static int FindAsciiLower(const CStringA
&str
, const CStringA
&find
)
358 for (int i
= 0; i
< str
.GetLength(); ++i
)
361 c
+= (c
>= 'A' && c
<= 'Z') ? 32 : 0;
366 for (int j
= i
+ 1; j
< str
.GetLength() && k
< find
.GetLength(); ++j
, ++k
)
369 d
+= (d
>= 'A' && d
<= 'Z') ? 32 : 0;
377 if (!diff
&& k
== find
.GetLength())
385 static int FindUtf8Lower(const CStringA
& strA
, bool allAscii
, const CString
&findW
, const CStringA
&findA
)
388 return FindAsciiLower(strA
, findA
);
390 CString strW
= CUnicodeUtils::GetUnicode(strA
);
391 return strW
.MakeLower().Find(findW
);
394 int CTortoiseGitBlameData::FindFirstLineWrapAround(SearchDirection direction
, const CString
& what
, int line
, bool bCaseSensitive
)
396 bool allAscii
= true;
397 for (int i
= 0; i
< what
.GetLength(); ++i
)
405 CString
whatNormalized(what
);
407 whatNormalized
.MakeLower();
409 CStringA whatNormalizedUtf8
= CUnicodeUtils::GetUTF8(whatNormalized
);
411 int numberOfLines
= GetNumberOfLines();
413 if (direction
== SearchPrevious
)
417 i
= numberOfLines
- 1;
419 else if (line
< 0 || line
+ 1 >= numberOfLines
)
426 if (m_Authors
[i
].Find(whatNormalized
) >= 0)
428 else if (m_Utf8Lines
[i
].Find(whatNormalizedUtf8
) >=0)
433 if (CString(m_Authors
[i
]).MakeLower().Find(whatNormalized
) >= 0)
435 else if (FindUtf8Lower(m_Utf8Lines
[i
], allAscii
, whatNormalized
, whatNormalizedUtf8
) >= 0)
439 if (direction
== SearchNext
)
442 if (i
>= numberOfLines
)
445 else if (direction
== SearchPrevious
)
449 i
= numberOfLines
- 2;
456 bool CTortoiseGitBlameData::ContainsOnlyFilename(const CString
&filename
) const
458 for (const auto& name
: m_Filenames
)
460 if (filename
!= name
)
466 GitRevLoglist
* CTortoiseGitBlameData::GetRevForHash(CGitHashMap
& HashToRev
, const CGitHash
& hash
, CString
* err
)
468 auto it
= HashToRev
.find(hash
);
469 if (it
== HashToRev
.end())
472 if (rev
.GetCommitFromHash(hash
))
474 *err
= rev
.GetLastErr();
477 it
= HashToRev
.emplace(hash
, rev
).first
;
479 return &(it
->second
);
482 CString
CTortoiseGitBlameData::UnquoteFilename(CStringA
& s
)
487 int i_size
= s
.GetLength();
488 bool isEscaped
= false;
489 for (int i
= 1; i
< i_size
; ++i
)
494 if (c
>= '0' && c
<= '3')
498 c
= (((c
- '0') & 03) << 6) | (((s
[i
+ 1] - '0') & 07) << 3) | ((s
[i
+ 2] - '0') & 07);
507 case 'a' : c
= '\a'; break;
508 case 'b' : c
= '\b'; break;
509 case 't' : c
= '\t'; break;
510 case 'n' : c
= '\n'; break;
511 case 'v' : c
= '\v'; break;
512 case 'f' : c
= '\f'; break;
513 case 'r' : c
= '\r'; break;
535 return CUnicodeUtils::GetUnicode(ret
);
538 return CUnicodeUtils::GetUnicode(s
);