src/TortoiseGitBlame/TortoiseGitBlameData.cpp

   1 // TortoiseGitBlame - a Viewer for Git Blames
   2
   3 // Copyright (C) 2008-2016 - TortoiseGit
   4 // Copyright (C) 2003 Don HO <donho@altern.org>
   5
   6 // This program is free software; you can redistribute it and/or
   7 // modify it under the terms of the GNU General Public License
   8 // as published by the Free Software Foundation; either version 2
   9 // of the License, or (at your option) any later version.
  10
  11 // This program is distributed in the hope that it will be useful,
  12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 // GNU General Public License for more details.
  15
  16 // You should have received a copy of the GNU General Public License
  17 // along with this program; if not, write to the Free Software Foundation,
  18 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  19
  20 // CTortoiseGitBlameData.cpp : implementation of the CTortoiseGitBlameData class
  21 //
  22
  23 #include "stdafx.h"
  24 #include "TortoiseGitBlameData.h"
  25 #include "LoglistUtils.h"
  26 #include "FileTextLines.h"
  27 #include "UnicodeUtils.h"
  28
  29 wchar_t WideCharSwap2(wchar_t nValue)
  30 {
  31         return (((nValue>> 8)) | (nValue << 8));
  32 }
  33
  34 // CTortoiseGitBlameData construction/destruction
  35
  36 CTortoiseGitBlameData::CTortoiseGitBlameData()
  37 {
  38         m_encode = -1;
  39 }
  40
  41 CTortoiseGitBlameData::~CTortoiseGitBlameData()
  42 {
  43 }
  44
  45 int CTortoiseGitBlameData::GetEncode(unsigned char *buff, int size, int *bomoffset)
  46 {
  47         CFileTextLines textlines;
  48         CFileTextLines::UnicodeType type = textlines.CheckUnicodeType(buff, size);
  49
  50         if (type == CFileTextLines::UTF8BOM)
  51         {
  52                 *bomoffset = 3;
  53                 return CP_UTF8;
  54         }
  55         if (type == CFileTextLines::UTF8)
  56                 return CP_UTF8;
  57
  58         if (type == CFileTextLines::UTF16_LE)
  59                 return 1200;
  60         if (type == CFileTextLines::UTF16_LEBOM)
  61         {
  62                 *bomoffset = 2;
  63                 return 1200;
  64         }
  65
  66         if (type == CFileTextLines::UTF16_BE)
  67                 return 1201;
  68         if (type == CFileTextLines::UTF16_BEBOM)
  69         {
  70                 *bomoffset = 2;
  71                 return 1201;
  72         }
  73
  74         return GetACP();
  75 }
  76
  77 int CTortoiseGitBlameData::GetEncode(int *bomoffset)
  78 {
  79         int encoding = 0;
  80         BYTE_VECTOR rawAll;
  81         for (const auto& rawBytes : m_RawLines)
  82                 rawAll.append(&rawBytes[0], rawBytes.size());
  83         encoding = GetEncode(&rawAll[0], (int)rawAll.size(), bomoffset);
  84         return encoding;
  85 }
  86
  87 void CTortoiseGitBlameData::ParseBlameOutput(BYTE_VECTOR &data, CGitHashMap & HashToRev, DWORD dateFormat, bool bRelativeTimes)
  88 {
  89         std::map<CGitHash, CString> hashToFilename;
  90
  91         std::vector<CGitHash>           hashes;
  92         std::vector<int>                        originalLineNumbers;
  93         std::vector<CString>            filenames;
  94         std::vector<BYTE_VECTOR>        rawLines;
  95         std::vector<CString>            authors;
  96         std::vector<CString>            dates;
  97
  98         CGitHash hash;
  99         int originalLineNumber = 0;
 100         int finalLineNumber = 0;
 101         int numberOfSubsequentLines = 0;
 102         CString filename;
 103
 104         size_t pos = 0;
 105         bool expectHash = true;
 106         while (pos < data.size())
 107         {
 108                 if (data[pos] == 0)
 109                 {
 110                         ++pos;
 111                         continue;
 112                 }
 113
 114                 size_t lineBegin = pos;
 115                 size_t lineEnd = data.find('\n', lineBegin);
 116                 if (lineEnd == BYTE_VECTOR::npos)
 117                         lineEnd = data.size();
 118
 119                 if (lineEnd > lineBegin)
 120                 {
 121                         if (data[lineBegin] != '\t')
 122                         {
 123                                 if (expectHash)
 124                                 {
 125                                         expectHash = false;
 126                                         if (lineEnd - lineBegin > 40)
 127                                         {
 128                                                 hash.ConvertFromStrA((char*)&data[lineBegin]);
 129
 130                                                 size_t hashEnd = lineBegin + 40;
 131                                                 size_t originalLineNumberBegin = hashEnd + 1;
 132                                                 size_t originalLineNumberEnd = data.find(' ', originalLineNumberBegin);
 133                                                 if (originalLineNumberEnd != BYTE_VECTOR::npos)
 134                                                 {
 135                                                         originalLineNumber = atoi(CStringA((LPCSTR)&data[originalLineNumberBegin], (int)(originalLineNumberEnd - originalLineNumberBegin)));
 136                                                         size_t finalLineNumberBegin = originalLineNumberEnd + 1;
 137                                                         size_t finalLineNumberEnd = (numberOfSubsequentLines == 0) ? data.find(' ', finalLineNumberBegin) : lineEnd;
 138                                                         if (finalLineNumberEnd != BYTE_VECTOR::npos)
 139                                                         {
 140                                                                 finalLineNumber = atoi(CStringA((LPCSTR)&data[finalLineNumberBegin], (int)(finalLineNumberEnd - finalLineNumberBegin)));
 141                                                                 if (numberOfSubsequentLines == 0)
 142                                                                 {
 143                                                                         size_t numberOfSubsequentLinesBegin = finalLineNumberEnd + 1;
 144                                                                         size_t numberOfSubsequentLinesEnd = lineEnd;
 145                                                                         numberOfSubsequentLines = atoi(CStringA((LPCSTR)&data[numberOfSubsequentLinesBegin], (int)(numberOfSubsequentLinesEnd - numberOfSubsequentLinesBegin)));
 146                                                                 }
 147                                                         }
 148                                                         else
 149                                                         {
 150                                                                 // parse error
 151                                                                 finalLineNumber = 0;
 152                                                                 numberOfSubsequentLines = 0;
 153                                                         }
 154                                                 }
 155                                                 else
 156                                                 {
 157                                                         // parse error
 158                                                         finalLineNumber = 0;
 159                                                         numberOfSubsequentLines = 0;
 160                                                 }
 161
 162                                                 auto it = hashToFilename.find(hash);
 163                                                 if (it != hashToFilename.end())
 164                                                         filename = it->second;
 165                                                 else
 166                                                         filename.Empty();
 167                                         }
 168                                         else
 169                                         {
 170                                                 // parse error
 171                                                 finalLineNumber = 0;
 172                                                 numberOfSubsequentLines = 0;
 173                                         }
 174                                 }
 175                                 else
 176                                 {
 177                                         size_t tokenBegin = lineBegin;
 178                                         size_t tokenEnd = data.find(' ', tokenBegin);
 179                                         if (tokenEnd != BYTE_VECTOR::npos)
 180                                         {
 181                                                 if (!strncmp("filename", (const char*)&data[tokenBegin], tokenEnd - tokenBegin))
 182                                                 {
 183                                                         size_t filenameBegin = tokenEnd + 1;
 184                                                         size_t filenameEnd = lineEnd;
 185                                                         CStringA filenameA = CStringA((LPCSTR)&data[filenameBegin], (int)(filenameEnd - filenameBegin));
 186                                                         filename = UnquoteFilename(filenameA);
 187                                                         auto r = hashToFilename.emplace(hash, filename);
 188                                                         if (!r.second)
 189                                                         {
 190                                                                 r.first->second = filename;
 191                                                         }
 192                                                 }
 193                                         }
 194                                 }
 195                         }
 196                         else
 197                         {
 198                                 expectHash = true;
 199                                 // remove <TAB> at start
 200                                 BYTE_VECTOR line;
 201                                 if (lineEnd - 1 > lineBegin)
 202                                         line.append(&data[lineBegin + 1], lineEnd-lineBegin - 1);
 203
 204                                 while (!line.empty() && line[line.size() - 1] == 13)
 205                                         line.pop_back();
 206
 207                                 hashes.push_back(hash);
 208                                 filenames.push_back(filename);
 209                                 originalLineNumbers.push_back(originalLineNumber);
 210                                 rawLines.push_back(line);
 211                                 --numberOfSubsequentLines;
 212                         }
 213                 }
 214                 pos = lineEnd + 1;
 215         }
 216
 217         for (const auto& hash2 : hashes)
 218         {
 219                 CString err;
 220                 GitRev* pRev = GetRevForHash(HashToRev, hash2, &err);
 221                 if (pRev)
 222                 {
 223                         authors.push_back(pRev->GetAuthorName());
 224                         dates.push_back(CLoglistUtils::FormatDateAndTime(pRev->GetAuthorDate(), dateFormat, true, bRelativeTimes));
 225                 }
 226                 else
 227                 {
 228                         MessageBox(nullptr, err, L"TortoiseGit", MB_ICONERROR);
 229                         authors.emplace_back();
 230                         dates.emplace_back();
 231                 }
 232         }
 233
 234         m_Hash.swap(hashes);
 235         m_OriginalLineNumbers.swap(originalLineNumbers);
 236         m_Filenames.swap(filenames);
 237         m_RawLines.swap(rawLines);
 238
 239         m_Authors.swap(authors);
 240         m_Dates.swap(dates);
 241         // reset detected and applied encoding
 242         m_encode = -1;
 243         m_Utf8Lines.clear();
 244 }
 245
 246 int CTortoiseGitBlameData::UpdateEncoding(int encode)
 247 {
 248         int encoding = encode;
 249         int bomoffset = 0;
 250         if (encoding==0)
 251         {
 252                 BYTE_VECTOR all;
 253                 for (const auto& rawLine : m_RawLines)
 254                 {
 255                         if (!rawLine.empty())
 256                                 all.append(&rawLine[0], rawLine.size());
 257                 }
 258                 encoding = GetEncode(&all[0], (int)all.size(), &bomoffset);
 259         }
 260
 261         if (encoding != m_encode)
 262         {
 263                 m_encode = encoding;
 264
 265                 m_Utf8Lines.resize(m_RawLines.size());
 266                 for (size_t i_Lines = 0; i_Lines < m_RawLines.size(); ++i_Lines)
 267                 {
 268                         const BYTE_VECTOR& rawLine = m_RawLines[i_Lines];
 269
 270                         int linebomoffset = 0;
 271                         CStringA lineUtf8;
 272                         lineUtf8.Empty();
 273
 274                         if (!rawLine.empty())
 275                         {
 276                                 if (encoding == 1201)
 277                                 {
 278                                         CString line;
 279                                         int size = (int)((rawLine.size() - linebomoffset) / 2);
 280                                         TCHAR *buffer = line.GetBuffer(size);
 281                                         memcpy(buffer, &rawLine[linebomoffset], sizeof(TCHAR) * size);
 282                                         // swap the bytes to little-endian order to get proper strings in wchar_t format
 283                                         wchar_t * pSwapBuf = buffer;
 284                                         for (int i = 0; i < size; ++i)
 285                                         {
 286                                                 *pSwapBuf = WideCharSwap2(*pSwapBuf);
 287                                                 ++pSwapBuf;
 288                                         }
 289                                         line.ReleaseBuffer();
 290
 291                                         lineUtf8 = CUnicodeUtils::GetUTF8(line);
 292                                 }
 293                                 else if (encoding == 1200)
 294                                 {
 295                                         CString line;
 296                                         // the first bomoffset is 2, after that it's 1 (see issue #920)
 297                                         // also: don't set bomoffset if called from Encodings menu (i.e. start == 42 and bomoffset == 0); bomoffset gets only set if autodetected
 298                                         if (linebomoffset == 0 && i_Lines != 0)
 299                                         {
 300                                                 linebomoffset = 1;
 301                                         }
 302                                         int size = (int)((rawLine.size() - linebomoffset) / 2);
 303                                         TCHAR *buffer = line.GetBuffer(size);
 304                                         memcpy(buffer, &rawLine[linebomoffset], sizeof(TCHAR) * size);
 305                                         line.ReleaseBuffer();
 306
 307                                         lineUtf8 = CUnicodeUtils::GetUTF8(line);
 308                                 }
 309                                 else if (encoding == CP_UTF8)
 310                                         lineUtf8 = CStringA((LPCSTR)&rawLine[linebomoffset], (int)(rawLine.size() - linebomoffset));
 311                                 else
 312                                 {
 313                                         CString line = CUnicodeUtils::GetUnicode(CStringA((LPCSTR)&rawLine[linebomoffset], (int)(rawLine.size() - linebomoffset)), encoding);
 314                                         lineUtf8 = CUnicodeUtils::GetUTF8(line);
 315                                 }
 316                         }
 317
 318                         m_Utf8Lines[i_Lines] = lineUtf8;
 319                         linebomoffset = 0;
 320                 }
 321         }
 322         return encoding;
 323 }
 324
 325 int CTortoiseGitBlameData::FindNextLine(CGitHash& CommitHash, int line, bool bUpOrDown)
 326 {
 327         int startline = line;
 328         bool findNoMatch = false;
 329         while (line >= 0 && line < (int)m_Hash.size())
 330         {
 331                 if (m_Hash[line] != CommitHash)
 332                         findNoMatch = true;
 333
 334                 if (m_Hash[line] == CommitHash && findNoMatch)
 335                 {
 336                         if (line == startline + 2)
 337                                 findNoMatch = false;
 338                         else
 339                         {
 340                                 if (bUpOrDown)
 341                                         line = FindFirstLineInBlock(CommitHash, line);
 342                                 return line;
 343                         }
 344                 }
 345                 if (bUpOrDown)
 346                         --line;
 347                 else
 348                         ++line;
 349         }
 350         return -1;
 351 }
 352
 353 static int FindAsciiLower(const CStringA &str, const CStringA &find)
 354 {
 355         if (find.IsEmpty())
 356                 return 0;
 357
 358         for (int i = 0; i < str.GetLength(); ++i)
 359         {
 360                 char c = str[i];
 361                 c += (c >= 'A' && c <= 'Z') ? 32 : 0;
 362                 if (c == find[0])
 363                 {
 364                         bool diff = false;
 365                         int k = 1;
 366                         for (int j = i + 1; j < str.GetLength() && k < find.GetLength(); ++j, ++k)
 367                         {
 368                                 char d = str[j];
 369                                 d += (d >= 'A' && d <= 'Z') ? 32 : 0;
 370                                 if (d != find[k])
 371                                 {
 372                                         diff = true;
 373                                         break;
 374                                 }
 375                         }
 376
 377                         if (!diff && k == find.GetLength())
 378                                 return i;
 379                 }
 380         }
 381
 382         return -1;
 383 }
 384
 385 static int FindUtf8Lower(const CStringA& strA, bool allAscii, const CString &findW, const CStringA &findA)
 386 {
 387         if (allAscii)
 388                 return FindAsciiLower(strA, findA);
 389
 390         CString strW = CUnicodeUtils::GetUnicode(strA);
 391         return strW.MakeLower().Find(findW);
 392 }
 393
 394 int CTortoiseGitBlameData::FindFirstLineWrapAround(SearchDirection direction, const CString& what, int line, bool bCaseSensitive)
 395 {
 396         bool allAscii = true;
 397         for (int i = 0; i < what.GetLength(); ++i)
 398         {
 399                 if (what[i] > 0x7f)
 400                 {
 401                         allAscii = false;
 402                         break;
 403                 }
 404         }
 405         CString whatNormalized(what);
 406         if (!bCaseSensitive)
 407                 whatNormalized.MakeLower();
 408
 409         CStringA whatNormalizedUtf8 = CUnicodeUtils::GetUTF8(whatNormalized);
 410
 411         int numberOfLines = GetNumberOfLines();
 412         int i = line;
 413         if (direction == SearchPrevious)
 414         {
 415                 i -= 2;
 416                 if (i < 0)
 417                         i = numberOfLines - 1;
 418         }
 419         else if (line < 0 || line + 1 >= numberOfLines)
 420                 i = 0;
 421
 422         do
 423         {
 424                 if (bCaseSensitive)
 425                 {
 426                         if (m_Authors[i].Find(whatNormalized) >= 0)
 427                                 return i;
 428                         else if (m_Utf8Lines[i].Find(whatNormalizedUtf8) >=0)
 429                                 return i;
 430                 }
 431                 else
 432                 {
 433                         if (CString(m_Authors[i]).MakeLower().Find(whatNormalized) >= 0)
 434                                 return i;
 435                         else if (FindUtf8Lower(m_Utf8Lines[i], allAscii, whatNormalized, whatNormalizedUtf8) >= 0)
 436                                 return i;
 437                 }
 438
 439                 if (direction == SearchNext)
 440                 {
 441                         ++i;
 442                         if (i >= numberOfLines)
 443                                 i = 0;
 444                 }
 445                 else if (direction == SearchPrevious)
 446                 {
 447                         --i;
 448                         if (i < 0)
 449                                 i = numberOfLines - 2;
 450                 }
 451         } while (i != line);
 452
 453         return -1;
 454 }
 455
 456 bool CTortoiseGitBlameData::ContainsOnlyFilename(const CString &filename) const
 457 {
 458         for (const auto& name : m_Filenames)
 459         {
 460                 if (filename != name)
 461                         return false;
 462         }
 463         return true;
 464 }
 465
 466 GitRevLoglist* CTortoiseGitBlameData::GetRevForHash(CGitHashMap& HashToRev, const CGitHash& hash, CString* err)
 467 {
 468         auto it = HashToRev.find(hash);
 469         if (it == HashToRev.end())
 470         {
 471                 GitRevLoglist rev;
 472                 if (rev.GetCommitFromHash(hash))
 473                 {
 474                         *err = rev.GetLastErr();
 475                         return nullptr;
 476                 }
 477                 it = HashToRev.emplace(hash, rev).first;
 478         }
 479         return &(it->second);
 480 }
 481
 482 CString CTortoiseGitBlameData::UnquoteFilename(CStringA& s)
 483 {
 484         if (s[0] == '"')
 485         {
 486                 CStringA ret;
 487                 int i_size = s.GetLength();
 488                 bool isEscaped = false;
 489                 for (int i = 1; i < i_size; ++i)
 490                 {
 491                         char c = s[i];
 492                         if (isEscaped)
 493                         {
 494                                 if (c >= '0' && c <= '3')
 495                                 {
 496                                         if (i + 2 < i_size)
 497                                         {
 498                                                 c = (((c - '0') & 03) << 6) | (((s[i + 1] - '0') & 07) << 3) | ((s[i + 2] - '0') & 07);
 499                                                 i += 2;
 500                                                 ret += c;
 501                                         }
 502                                 }
 503                                 else
 504                                 {
 505                                         switch (c)
 506                                         {
 507                                         case 'a' : c = '\a'; break;
 508                                         case 'b' : c = '\b'; break;
 509                                         case 't' : c = '\t'; break;
 510                                         case 'n' : c = '\n'; break;
 511                                         case 'v' : c = '\v'; break;
 512                                         case 'f' : c = '\f'; break;
 513                                         case 'r' : c = '\r'; break;
 514                                         }
 515                                         ret += c;
 516                                 }
 517                                 isEscaped = false;
 518                         }
 519                         else
 520                         {
 521                                 if (c == '\\')
 522                                 {
 523                                         isEscaped = true;
 524                                 }
 525                                 else if(c == '"')
 526                                 {
 527                                         break;
 528                                 }
 529                                 else
 530                                 {
 531                                         ret += c;
 532                                 }
 533                         }
 534                 }
 535                 return CUnicodeUtils::GetUnicode(ret);
 536         }
 537         else
 538                 return CUnicodeUtils::GetUnicode(s);
 539 }