src/TortoiseGitBlame/TortoiseGitBlameData.cpp

   1 // TortoiseGitBlame - a Viewer for Git Blames
   2
   3 // Copyright (C) 2008-2018 - TortoiseGit
   4 // Copyright (C) 2003 Don HO <donho@altern.org>
   5
   6 // This program is free software; you can redistribute it and/or
   7 // modify it under the terms of the GNU General Public License
   8 // as published by the Free Software Foundation; either version 2
   9 // of the License, or (at your option) any later version.
  10
  11 // This program is distributed in the hope that it will be useful,
  12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 // GNU General Public License for more details.
  15
  16 // You should have received a copy of the GNU General Public License
  17 // along with this program; if not, write to the Free Software Foundation,
  18 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  19
  20 // CTortoiseGitBlameData.cpp : implementation of the CTortoiseGitBlameData class
  21 //
  22
  23 #include "stdafx.h"
  24 #include "TortoiseGitBlameData.h"
  25 #include "LoglistUtils.h"
  26 #include "FileTextLines.h"
  27 #include "UnicodeUtils.h"
  28
  29 wchar_t WideCharSwap2(wchar_t nValue)
  30 {
  31         return (((nValue>> 8)) | (nValue << 8));
  32 }
  33
  34 // CTortoiseGitBlameData construction/destruction
  35
  36 CTortoiseGitBlameData::CTortoiseGitBlameData()
  37         : m_encode(-1)
  38 {
  39 }
  40
  41 CTortoiseGitBlameData::~CTortoiseGitBlameData()
  42 {
  43 }
  44
  45 int CTortoiseGitBlameData::GetEncode(unsigned char *buff, int size, int *bomoffset)
  46 {
  47         CFileTextLines textlines;
  48         CFileTextLines::UnicodeType type = textlines.CheckUnicodeType(buff, size);
  49
  50         if (type == CFileTextLines::UTF8BOM)
  51         {
  52                 *bomoffset = 3;
  53                 return CP_UTF8;
  54         }
  55         if (type == CFileTextLines::UTF8)
  56                 return CP_UTF8;
  57
  58         if (type == CFileTextLines::UTF16_LE)
  59                 return 1200;
  60         if (type == CFileTextLines::UTF16_LEBOM)
  61         {
  62                 *bomoffset = 2;
  63                 return 1200;
  64         }
  65
  66         if (type == CFileTextLines::UTF16_BE)
  67                 return 1201;
  68         if (type == CFileTextLines::UTF16_BEBOM)
  69         {
  70                 *bomoffset = 2;
  71                 return 1201;
  72         }
  73
  74         return GetACP();
  75 }
  76
  77 int CTortoiseGitBlameData::GetEncode(int *bomoffset)
  78 {
  79         int encoding = 0;
  80         BYTE_VECTOR rawAll;
  81         for (const auto& rawBytes : m_RawLines)
  82                 rawAll.append(rawBytes.data(), rawBytes.size());
  83         encoding = GetEncode(rawAll.data(), (int)rawAll.size(), bomoffset);
  84         return encoding;
  85 }
  86
  87 void CTortoiseGitBlameData::ParseBlameOutput(BYTE_VECTOR &data, CGitHashMap & HashToRev, DWORD dateFormat, bool bRelativeTimes)
  88 {
  89         std::unordered_map<CGitHash, CString> hashToFilename;
  90
  91         std::vector<CGitHash>           hashes;
  92         std::vector<int>                        originalLineNumbers;
  93         std::vector<CString>            filenames;
  94         std::vector<BYTE_VECTOR>        rawLines;
  95         std::vector<CString>            authors;
  96         std::vector<CString>            dates;
  97
  98         CGitHash hash;
  99         int originalLineNumber = 0;
 100         int numberOfSubsequentLines = 0;
 101         CString filename;
 102
 103         size_t pos = 0;
 104         bool expectHash = true;
 105         while (pos < data.size())
 106         {
 107                 if (data[pos] == 0)
 108                 {
 109                         ++pos;
 110                         continue;
 111                 }
 112
 113                 size_t lineBegin = pos;
 114                 size_t lineEnd = data.find('\n', lineBegin);
 115                 if (lineEnd == BYTE_VECTOR::npos)
 116                         lineEnd = data.size();
 117
 118                 if (lineEnd > lineBegin)
 119                 {
 120                         if (data[lineBegin] != '\t')
 121                         {
 122                                 if (expectHash)
 123                                 {
 124                                         expectHash = false;
 125                                         if (lineEnd - lineBegin > 2 * GIT_HASH_SIZE)
 126                                         {
 127                                                 hash.ConvertFromStrA((char*)&data[lineBegin]);
 128
 129                                                 size_t hashEnd = lineBegin + 2 * GIT_HASH_SIZE;
 130                                                 size_t originalLineNumberBegin = hashEnd + 1;
 131                                                 size_t originalLineNumberEnd = data.find(' ', originalLineNumberBegin);
 132                                                 if (originalLineNumberEnd != BYTE_VECTOR::npos)
 133                                                 {
 134                                                         originalLineNumber = atoi(CStringA((LPCSTR)&data[originalLineNumberBegin], (int)(originalLineNumberEnd - originalLineNumberBegin)));
 135                                                         size_t finalLineNumberBegin = originalLineNumberEnd + 1;
 136                                                         size_t finalLineNumberEnd = (numberOfSubsequentLines == 0) ? data.find(' ', finalLineNumberBegin) : lineEnd;
 137                                                         if (finalLineNumberEnd != BYTE_VECTOR::npos)
 138                                                         {
 139                                                                 if (numberOfSubsequentLines == 0)
 140                                                                 {
 141                                                                         size_t numberOfSubsequentLinesBegin = finalLineNumberEnd + 1;
 142                                                                         size_t numberOfSubsequentLinesEnd = lineEnd;
 143                                                                         numberOfSubsequentLines = atoi(CStringA((LPCSTR)&data[numberOfSubsequentLinesBegin], (int)(numberOfSubsequentLinesEnd - numberOfSubsequentLinesBegin)));
 144                                                                 }
 145                                                         }
 146                                                         else
 147                                                         {
 148                                                                 // parse error
 149                                                                 numberOfSubsequentLines = 0;
 150                                                         }
 151                                                 }
 152                                                 else
 153                                                 {
 154                                                         // parse error
 155                                                         numberOfSubsequentLines = 0;
 156                                                 }
 157
 158                                                 auto it = hashToFilename.find(hash);
 159                                                 if (it != hashToFilename.end())
 160                                                         filename = it->second;
 161                                                 else
 162                                                         filename.Empty();
 163                                         }
 164                                         else
 165                                         {
 166                                                 // parse error
 167                                                 numberOfSubsequentLines = 0;
 168                                         }
 169                                 }
 170                                 else
 171                                 {
 172                                         size_t tokenBegin = lineBegin;
 173                                         size_t tokenEnd = data.find(' ', tokenBegin);
 174                                         if (tokenEnd != BYTE_VECTOR::npos)
 175                                         {
 176                                                 if (!strncmp("filename", (const char*)&data[tokenBegin], tokenEnd - tokenBegin))
 177                                                 {
 178                                                         size_t filenameBegin = tokenEnd + 1;
 179                                                         size_t filenameEnd = lineEnd;
 180                                                         CStringA filenameA = CStringA((LPCSTR)&data[filenameBegin], (int)(filenameEnd - filenameBegin));
 181                                                         filename = UnquoteFilename(filenameA);
 182                                                         auto r = hashToFilename.emplace(hash, filename);
 183                                                         if (!r.second)
 184                                                         {
 185                                                                 r.first->second = filename;
 186                                                         }
 187                                                 }
 188                                         }
 189                                 }
 190                         }
 191                         else
 192                         {
 193                                 expectHash = true;
 194                                 // remove <TAB> at start
 195                                 BYTE_VECTOR line;
 196                                 if (lineEnd - 1 > lineBegin)
 197                                         line.append(&data[lineBegin + 1], lineEnd-lineBegin - 1);
 198
 199                                 while (!line.empty() && line[line.size() - 1] == 13)
 200                                         line.pop_back();
 201
 202                                 hashes.push_back(hash);
 203                                 filenames.push_back(filename);
 204                                 originalLineNumbers.push_back(originalLineNumber);
 205                                 rawLines.push_back(line);
 206                                 --numberOfSubsequentLines;
 207                         }
 208                 }
 209                 pos = lineEnd + 1;
 210         }
 211
 212         for (const auto& hash2 : hashes)
 213         {
 214                 CString err;
 215                 GitRev* pRev = GetRevForHash(HashToRev, hash2, &err);
 216                 if (pRev)
 217                 {
 218                         authors.push_back(pRev->GetAuthorName());
 219                         dates.push_back(CLoglistUtils::FormatDateAndTime(pRev->GetAuthorDate(), dateFormat, true, bRelativeTimes));
 220                 }
 221                 else
 222                 {
 223                         MessageBox(nullptr, err, L"TortoiseGit", MB_ICONERROR);
 224                         authors.emplace_back();
 225                         dates.emplace_back();
 226                 }
 227         }
 228
 229         m_Hash.swap(hashes);
 230         m_OriginalLineNumbers.swap(originalLineNumbers);
 231         m_Filenames.swap(filenames);
 232         m_RawLines.swap(rawLines);
 233
 234         m_Authors.swap(authors);
 235         m_Dates.swap(dates);
 236         // reset detected and applied encoding
 237         m_encode = -1;
 238         m_Utf8Lines.clear();
 239 }
 240
 241 int CTortoiseGitBlameData::UpdateEncoding(int encode)
 242 {
 243         int encoding = encode;
 244         int bomoffset = 0;
 245         if (encoding==0)
 246         {
 247                 BYTE_VECTOR all;
 248                 for (const auto& rawLine : m_RawLines)
 249                 {
 250                         if (!rawLine.empty())
 251                                 all.append(rawLine.data(), rawLine.size());
 252                 }
 253                 encoding = GetEncode(all.data(), (int)all.size(), &bomoffset);
 254         }
 255
 256         if (encoding != m_encode)
 257         {
 258                 m_encode = encoding;
 259
 260                 m_Utf8Lines.resize(m_RawLines.size());
 261                 for (size_t i_Lines = 0; i_Lines < m_RawLines.size(); ++i_Lines)
 262                 {
 263                         const BYTE_VECTOR& rawLine = m_RawLines[i_Lines];
 264
 265                         int linebomoffset = 0;
 266                         CStringA lineUtf8;
 267                         lineUtf8.Empty();
 268
 269                         if (!rawLine.empty())
 270                         {
 271                                 if (encoding == 1201)
 272                                 {
 273                                         CString line;
 274                                         int size = (int)((rawLine.size() - linebomoffset) / 2);
 275                                         TCHAR *buffer = line.GetBuffer(size);
 276                                         memcpy(buffer, &rawLine[linebomoffset], sizeof(TCHAR) * size);
 277                                         // swap the bytes to little-endian order to get proper strings in wchar_t format
 278                                         wchar_t * pSwapBuf = buffer;
 279                                         for (int i = 0; i < size; ++i)
 280                                         {
 281                                                 *pSwapBuf = WideCharSwap2(*pSwapBuf);
 282                                                 ++pSwapBuf;
 283                                         }
 284                                         line.ReleaseBuffer();
 285
 286                                         lineUtf8 = CUnicodeUtils::GetUTF8(line);
 287                                 }
 288                                 else if (encoding == 1200)
 289                                 {
 290                                         CString line;
 291                                         // the first bomoffset is 2, after that it's 1 (see issue #920)
 292                                         // also: don't set bomoffset if called from Encodings menu (i.e. start == 42 and bomoffset == 0); bomoffset gets only set if autodetected
 293                                         if (linebomoffset == 0 && i_Lines != 0)
 294                                         {
 295                                                 linebomoffset = 1;
 296                                         }
 297                                         int size = (int)((rawLine.size() - linebomoffset) / 2);
 298                                         TCHAR *buffer = line.GetBuffer(size);
 299                                         memcpy(buffer, &rawLine[linebomoffset], sizeof(TCHAR) * size);
 300                                         line.ReleaseBuffer();
 301
 302                                         lineUtf8 = CUnicodeUtils::GetUTF8(line);
 303                                 }
 304                                 else if (encoding == CP_UTF8)
 305                                         lineUtf8 = CStringA((LPCSTR)&rawLine[linebomoffset], (int)(rawLine.size() - linebomoffset));
 306                                 else
 307                                 {
 308                                         CString line = CUnicodeUtils::GetUnicode(CStringA((LPCSTR)&rawLine[linebomoffset], (int)(rawLine.size() - linebomoffset)), encoding);
 309                                         lineUtf8 = CUnicodeUtils::GetUTF8(line);
 310                                 }
 311                         }
 312
 313                         m_Utf8Lines[i_Lines] = lineUtf8;
 314                         linebomoffset = 0;
 315                 }
 316         }
 317         return encoding;
 318 }
 319
 320 int CTortoiseGitBlameData::FindNextLine(CGitHash& CommitHash, int line, bool bUpOrDown)
 321 {
 322         int startline = line;
 323         bool findNoMatch = false;
 324         while (line >= 0 && line < (int)m_Hash.size())
 325         {
 326                 if (m_Hash[line] != CommitHash)
 327                         findNoMatch = true;
 328
 329                 if (m_Hash[line] == CommitHash && findNoMatch)
 330                 {
 331                         if (line == startline + 2)
 332                                 findNoMatch = false;
 333                         else
 334                         {
 335                                 if (bUpOrDown)
 336                                         line = FindFirstLineInBlock(CommitHash, line);
 337                                 return line;
 338                         }
 339                 }
 340                 if (bUpOrDown)
 341                         --line;
 342                 else
 343                         ++line;
 344         }
 345         return -1;
 346 }
 347
 348 static int FindAsciiLower(const CStringA &str, const CStringA &find)
 349 {
 350         if (find.IsEmpty())
 351                 return 0;
 352
 353         for (int i = 0; i < str.GetLength(); ++i)
 354         {
 355                 char c = str[i];
 356                 c += (c >= 'A' && c <= 'Z') ? 32 : 0;
 357                 if (c == find[0])
 358                 {
 359                         bool diff = false;
 360                         int k = 1;
 361                         for (int j = i + 1; j < str.GetLength() && k < find.GetLength(); ++j, ++k)
 362                         {
 363                                 char d = str[j];
 364                                 d += (d >= 'A' && d <= 'Z') ? 32 : 0;
 365                                 if (d != find[k])
 366                                 {
 367                                         diff = true;
 368                                         break;
 369                                 }
 370                         }
 371
 372                         if (!diff && k == find.GetLength())
 373                                 return i;
 374                 }
 375         }
 376
 377         return -1;
 378 }
 379
 380 static int FindUtf8Lower(const CStringA& strA, bool allAscii, const CString &findW, const CStringA &findA)
 381 {
 382         if (allAscii)
 383                 return FindAsciiLower(strA, findA);
 384
 385         CString strW = CUnicodeUtils::GetUnicode(strA);
 386         return strW.MakeLower().Find(findW);
 387 }
 388
 389 int CTortoiseGitBlameData::FindFirstLineWrapAround(SearchDirection direction, const CString& what, int line, bool bCaseSensitive, std::function<void()> wraparound)
 390 {
 391         bool allAscii = true;
 392         for (int i = 0; i < what.GetLength(); ++i)
 393         {
 394                 if (what[i] > 0x7f)
 395                 {
 396                         allAscii = false;
 397                         break;
 398                 }
 399         }
 400         CString whatNormalized(what);
 401         if (!bCaseSensitive)
 402                 whatNormalized.MakeLower();
 403
 404         CStringA whatNormalizedUtf8 = CUnicodeUtils::GetUTF8(whatNormalized);
 405
 406         auto numberOfLines = (int)GetNumberOfLines();
 407         if (numberOfLines == 0)
 408                 return -1;
 409         int i = line;
 410         if (direction == SearchPrevious)
 411         {
 412                 i -= 2;
 413                 if (i < 0)
 414                         i = numberOfLines - 1;
 415         }
 416         else if (line < 0 || line + 1 >= numberOfLines)
 417                 i = 0;
 418
 419         do
 420         {
 421                 if (bCaseSensitive)
 422                 {
 423                         if (m_Authors[i].Find(whatNormalized) >= 0)
 424                                 return i;
 425                         else if (m_Utf8Lines[i].Find(whatNormalizedUtf8) >=0)
 426                                 return i;
 427                 }
 428                 else
 429                 {
 430                         if (CString(m_Authors[i]).MakeLower().Find(whatNormalized) >= 0)
 431                                 return i;
 432                         else if (FindUtf8Lower(m_Utf8Lines[i], allAscii, whatNormalized, whatNormalizedUtf8) >= 0)
 433                                 return i;
 434                 }
 435
 436                 if (direction == SearchNext)
 437                 {
 438                         ++i;
 439                         if (i >= numberOfLines)
 440                         {
 441                                 i = 0;
 442                                 if (wraparound)
 443                                         wraparound();
 444                         }
 445                 }
 446                 else if (direction == SearchPrevious)
 447                 {
 448                         --i;
 449                         if (i < 0)
 450                         {
 451                                 i = numberOfLines - 2;
 452                                 if (wraparound)
 453                                         wraparound();
 454                         }
 455                 }
 456         } while (i != line);
 457
 458         return -1;
 459 }
 460
 461 bool CTortoiseGitBlameData::ContainsOnlyFilename(const CString &filename) const
 462 {
 463         for (const auto& name : m_Filenames)
 464         {
 465                 if (filename != name)
 466                         return false;
 467         }
 468         return true;
 469 }
 470
 471 GitRevLoglist* CTortoiseGitBlameData::GetRevForHash(CGitHashMap& HashToRev, const CGitHash& hash, CString* err)
 472 {
 473         auto it = HashToRev.find(hash);
 474         if (it == HashToRev.end())
 475         {
 476                 GitRevLoglist rev;
 477                 if (rev.GetCommitFromHash(hash))
 478                 {
 479                         *err = rev.GetLastErr();
 480                         return nullptr;
 481                 }
 482                 it = HashToRev.emplace(hash, rev).first;
 483         }
 484         return &(it->second);
 485 }
 486
 487 CString CTortoiseGitBlameData::UnquoteFilename(CStringA& s)
 488 {
 489         if (s[0] == '"')
 490         {
 491                 CStringA ret;
 492                 int i_size = s.GetLength();
 493                 bool isEscaped = false;
 494                 for (int i = 1; i < i_size; ++i)
 495                 {
 496                         char c = s[i];
 497                         if (isEscaped)
 498                         {
 499                                 if (c >= '0' && c <= '3')
 500                                 {
 501                                         if (i + 2 < i_size)
 502                                         {
 503                                                 c = (((c - '0') & 03) << 6) | (((s[i + 1] - '0') & 07) << 3) | ((s[i + 2] - '0') & 07);
 504                                                 i += 2;
 505                                                 ret += c;
 506                                         }
 507                                 }
 508                                 else
 509                                 {
 510                                         switch (c)
 511                                         {
 512                                         case 'a' : c = '\a'; break;
 513                                         case 'b' : c = '\b'; break;
 514                                         case 't' : c = '\t'; break;
 515                                         case 'n' : c = '\n'; break;
 516                                         case 'v' : c = '\v'; break;
 517                                         case 'f' : c = '\f'; break;
 518                                         case 'r' : c = '\r'; break;
 519                                         }
 520                                         ret += c;
 521                                 }
 522                                 isEscaped = false;
 523                         }
 524                         else
 525                         {
 526                                 if (c == '\\')
 527                                 {
 528                                         isEscaped = true;
 529                                 }
 530                                 else if(c == '"')
 531                                 {
 532                                         break;
 533                                 }
 534                                 else
 535                                 {
 536                                         ret += c;
 537                                 }
 538                         }
 539                 }
 540                 return CUnicodeUtils::GetUnicode(ret);
 541         }
 542         else
 543                 return CUnicodeUtils::GetUnicode(s);
 544 }