src/TortoiseGitBlame/TortoiseGitBlameData.cpp

   1 // TortoiseGitBlame - a Viewer for Git Blames
   2
   3 // Copyright (C) 2008-2013 - TortoiseGit
   4 // Copyright (C) 2010-2013 Sven Strickroth <email@cs-ware.de>
   5 // Copyright (C) 2003 Don HO <donho@altern.org>
   6
   7 // This program is free software; you can redistribute it and/or
   8 // modify it under the terms of the GNU General Public License
   9 // as published by the Free Software Foundation; either version 2
  10 // of the License, or (at your option) any later version.
  11
  12 // This program is distributed in the hope that it will be useful,
  13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 // GNU General Public License for more details.
  16
  17 // You should have received a copy of the GNU General Public License
  18 // along with this program; if not, write to the Free Software Foundation,
  19 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  20
  21 // CTortoiseGitBlameData.cpp : implementation of the CTortoiseGitBlameData class
  22 //
  23
  24 #include "stdafx.h"
  25 #include "TortoiseGitBlame.h"
  26 #include "CommonAppUtils.h"
  27 #include "TortoiseGitBlameDoc.h"
  28 #include "TortoiseGitBlameData.h"
  29 #include "MainFrm.h"
  30 #include "EditGotoDlg.h"
  31 #include "LoglistUtils.h"
  32 #include "FileTextLines.h"
  33 #include "UnicodeUtils.h"
  34 #include "MenuEncode.h"
  35 #include "gitdll.h"
  36 #include "SysInfo.h"
  37 #include "StringUtils.h"
  38 #include "TGitPath.h"
  39
  40 wchar_t WideCharSwap2(wchar_t nValue)
  41 {
  42         return (((nValue>> 8)) | (nValue << 8));
  43 }
  44
  45 // CTortoiseGitBlameData construction/destruction
  46
  47 CTortoiseGitBlameData::CTortoiseGitBlameData()
  48 {
  49         m_encode = -1;
  50 }
  51
  52 CTortoiseGitBlameData::~CTortoiseGitBlameData()
  53 {
  54 }
  55
  56 int CTortoiseGitBlameData::GetEncode(unsigned char *buff, int size, int *bomoffset)
  57 {
  58         CFileTextLines textlines;
  59         CFileTextLines::UnicodeType type = textlines.CheckUnicodeType(buff, size);
  60
  61         if (type == CFileTextLines::UTF8BOM)
  62         {
  63                 *bomoffset = 3;
  64                 return CP_UTF8;
  65         }
  66         if (type == CFileTextLines::UTF8)
  67                 return CP_UTF8;
  68
  69         if (type == CFileTextLines::UTF16_LE)
  70                 return 1200;
  71         if (type == CFileTextLines::UTF16_LEBOM)
  72         {
  73                 *bomoffset = 2;
  74                 return 1200;
  75         }
  76
  77         if (type == CFileTextLines::UTF16_BE)
  78                 return 1201;
  79         if (type == CFileTextLines::UTF16_BEBOM)
  80         {
  81                 *bomoffset = 2;
  82                 return 1201;
  83         }
  84
  85         return GetACP();
  86 }
  87
  88 int CTortoiseGitBlameData::GetEncode(int *bomoffset)
  89 {
  90         int encoding = 0;
  91         BYTE_VECTOR rawAll;
  92         for (auto it = m_RawLines.begin(), it_end = m_RawLines.end(); it != it_end; ++it)
  93         {
  94                 rawAll.append(&(*it)[0], it->size());
  95         }
  96         encoding = GetEncode(&rawAll[0], (int)rawAll.size(), bomoffset);
  97         return encoding;
  98 }
  99
 100 void CTortoiseGitBlameData::ParseBlameOutput(BYTE_VECTOR &data, CGitHashMap & HashToRev, DWORD dateFormat, bool bRelativeTimes)
 101 {
 102         std::map<CGitHash, CString> hashToFilename;
 103
 104         std::vector<CGitHash>           hashes;
 105         std::vector<int>                        originalLineNumbers;
 106         std::vector<CString>            filenames;
 107         std::vector<BYTE_VECTOR>        rawLines;
 108         std::vector<CString>            authors;
 109         std::vector<CString>            dates;
 110
 111         CGitHash hash;
 112         int originalLineNumber = 0;
 113         int finalLineNumber = 0;
 114         int numberOfSubsequentLines = 0;
 115         CString filename;
 116
 117         int pos = 0;
 118         bool expectHash = true;
 119         while (pos >= 0 && (size_t)pos < data.size())
 120         {
 121                 if (data[pos] == 0)
 122                         continue;
 123
 124                 int lineBegin = pos;
 125                 int lineEnd = data.findData((const BYTE*)"\n", 1, lineBegin);
 126                 if (lineEnd < 0)
 127                         lineEnd = (int)data.size();
 128
 129                 if (lineEnd > lineBegin)
 130                 {
 131                         if (data[lineBegin] != '\t')
 132                         {
 133                                 if (expectHash)
 134                                 {
 135                                         expectHash = false;
 136                                         if (lineEnd - lineBegin > 40)
 137                                         {
 138                                                 hash.ConvertFromStrA((char*)&data[lineBegin]);
 139
 140                                                 int hashEnd = lineBegin + 40;
 141                                                 int originalLineNumberBegin = hashEnd + 1;
 142                                                 int originalLineNumberEnd = data.findData((const BYTE*)" ", 1, originalLineNumberBegin);
 143                                                 if (originalLineNumberEnd >= 0)
 144                                                 {
 145                                                         originalLineNumber = atoi(CStringA((LPCSTR)&data[originalLineNumberBegin], originalLineNumberEnd - originalLineNumberBegin));
 146                                                         int finalLineNumberBegin = originalLineNumberEnd + 1;
 147                                                         int finalLineNumberEnd = (numberOfSubsequentLines == 0) ? data.findData((const BYTE*)" ", 1, finalLineNumberBegin) : lineEnd;
 148                                                         if (finalLineNumberEnd >= 0)
 149                                                         {
 150                                                                 finalLineNumber = atoi(CStringA((LPCSTR)&data[finalLineNumberBegin], finalLineNumberEnd - finalLineNumberBegin));
 151                                                                 if (numberOfSubsequentLines == 0)
 152                                                                 {
 153                                                                         int numberOfSubsequentLinesBegin = finalLineNumberEnd + 1;
 154                                                                         int numberOfSubsequentLinesEnd = lineEnd;
 155                                                                         numberOfSubsequentLines = atoi(CStringA((LPCSTR)&data[numberOfSubsequentLinesBegin], numberOfSubsequentLinesEnd - numberOfSubsequentLinesBegin));
 156                                                                 }
 157                                                         }
 158                                                         else
 159                                                         {
 160                                                                 // parse error
 161                                                                 finalLineNumber = 0;
 162                                                                 numberOfSubsequentLines = 0;
 163                                                         }
 164                                                 }
 165                                                 else
 166                                                 {
 167                                                         // parse error
 168                                                         finalLineNumber = 0;
 169                                                         numberOfSubsequentLines = 0;
 170                                                 }
 171
 172                                                 auto it = hashToFilename.find(hash);
 173                                                 if (it != hashToFilename.end())
 174                                                         filename = it->second;
 175                                                 else
 176                                                         filename.Empty();
 177                                         }
 178                                         else
 179                                         {
 180                                                 // parse error
 181                                                 finalLineNumber = 0;
 182                                                 numberOfSubsequentLines = 0;
 183                                         }
 184                                 }
 185                                 else
 186                                 {
 187                                         int tokenBegin = lineBegin;
 188                                         int tokenEnd = data.findData((const BYTE*)" ", 1, tokenBegin);
 189                                         if (tokenEnd >= 0)
 190                                         {
 191                                                 if (!strncmp("filename", (const char*)&data[tokenBegin], tokenEnd - tokenBegin))
 192                                                 {
 193                                                         int filenameBegin = tokenEnd + 1;
 194                                                         int filenameEnd = lineEnd;
 195                                                         CStringA filenameA = CStringA((LPCSTR)&data[filenameBegin], filenameEnd - filenameBegin);
 196                                                         filename = UnquoteFilename(filenameA);
 197                                                         auto r = hashToFilename.insert(std::make_pair(hash, filename));
 198                                                         if (!r.second)
 199                                                         {
 200                                                                 r.first->second = filename;
 201                                                         }
 202                                                 }
 203                                         }
 204                                 }
 205                         }
 206                         else
 207                         {
 208                                 expectHash = true;
 209                                 // remove <TAB> at start
 210                                 BYTE_VECTOR line;
 211                                 if (lineEnd - 1 > lineBegin)
 212                                         line.append(&data[lineBegin + 1], lineEnd-lineBegin - 1);
 213
 214                                 hashes.push_back(hash);
 215                                 filenames.push_back(filename);
 216                                 originalLineNumbers.push_back(originalLineNumber);
 217                                 rawLines.push_back(line);
 218                                 --numberOfSubsequentLines;
 219                         }
 220                 }
 221                 pos = lineEnd + 1;
 222         }
 223
 224         for (auto it = hashes.begin(), it_end = hashes.end(); it != it_end; ++it)
 225         {
 226                 CGitHash hash = *it;
 227                 GitRev *pRev = GetRevForHash(HashToRev, hash);
 228                 if (pRev)
 229                 {
 230                         authors.push_back(pRev->GetAuthorName());
 231                         dates.push_back(CLoglistUtils::FormatDateAndTime(pRev->GetAuthorDate(), dateFormat, true, bRelativeTimes));
 232                 }
 233                 else
 234                 {
 235                         authors.push_back(CString());
 236                         dates.push_back(CString());
 237                 }
 238         }
 239
 240         m_Hash.swap(hashes);
 241         m_OriginalLineNumbers.swap(originalLineNumbers);
 242         m_Filenames.swap(filenames);
 243         m_RawLines.swap(rawLines);
 244
 245         m_Authors.swap(authors);
 246         m_Dates.swap(dates);
 247         // reset detected and applied encoding
 248         m_encode = -1;
 249         m_Utf8Lines.clear();
 250 }
 251
 252 int CTortoiseGitBlameData::UpdateEncoding(int encode)
 253 {
 254         int encoding = encode;
 255         int bomoffset = 0;
 256         if (encoding==0)
 257         {
 258                 BYTE_VECTOR all;
 259                 for (auto it = m_RawLines.begin(); it != m_RawLines.end(); ++it)
 260                 {
 261                         if (!it->empty())
 262                                 all.append(&(*it)[0], it->size());
 263                 }
 264                 encoding = GetEncode(&all[0], (int)all.size(), &bomoffset);
 265         }
 266
 267         if (encoding != m_encode)
 268         {
 269                 m_encode = encoding;
 270
 271                 m_Utf8Lines.resize(m_RawLines.size());
 272                 for (size_t i_Lines = 0; i_Lines < m_RawLines.size(); ++i_Lines)
 273                 {
 274                         const BYTE_VECTOR& rawLine = m_RawLines[i_Lines];
 275
 276                         int bomoffset = 0;
 277                         CStringA lineUtf8;
 278                         lineUtf8.Empty();
 279
 280                         if (!rawLine.empty())
 281                         {
 282                                 if (encoding == 1201)
 283                                 {
 284                                         CString line;
 285                                         int size = (int)((rawLine.size() - bomoffset)/2);
 286                                         TCHAR *buffer = line.GetBuffer(size);
 287                                         memcpy(buffer, &rawLine[bomoffset], sizeof(TCHAR)*size);
 288                                         // swap the bytes to little-endian order to get proper strings in wchar_t format
 289                                         wchar_t * pSwapBuf = buffer;
 290                                         for (int i = 0; i < size; ++i)
 291                                         {
 292                                                 *pSwapBuf = WideCharSwap2(*pSwapBuf);
 293                                                 ++pSwapBuf;
 294                                         }
 295                                         line.ReleaseBuffer();
 296
 297                                         lineUtf8 = CUnicodeUtils::GetUTF8(line);
 298                                 }
 299                                 else if (encoding == 1200)
 300                                 {
 301                                         CString line;
 302                                         // the first bomoffset is 2, after that it's 1 (see issue #920)
 303                                         // also: don't set bomoffset if called from Encodings menu (i.e. start == 42 and bomoffset == 0); bomoffset gets only set if autodetected
 304                                         if (bomoffset == 0 && i_Lines != 0)
 305                                         {
 306                                                 bomoffset = 1;
 307                                         }
 308                                         int size = (int)((rawLine.size() - bomoffset)/2);
 309                                         TCHAR *buffer = line.GetBuffer(size);
 310                                         memcpy(buffer, &rawLine[bomoffset], sizeof(TCHAR) * size);
 311                                         line.ReleaseBuffer();
 312
 313                                         lineUtf8 = CUnicodeUtils::GetUTF8(line);
 314                                 }
 315                                 else if (encoding == CP_UTF8)
 316                                         lineUtf8 = CStringA((LPCSTR)&rawLine[bomoffset], (int)(rawLine.size() - bomoffset));
 317                                 else
 318                                 {
 319                                         CString line = CUnicodeUtils::GetUnicode(CStringA((LPCSTR)&rawLine[bomoffset], (int)(rawLine.size() - bomoffset)), encoding);
 320                                         lineUtf8 = CUnicodeUtils::GetUTF8(line);
 321                                 }
 322                         }
 323
 324                         m_Utf8Lines[i_Lines] = lineUtf8;
 325                         bomoffset = 0;
 326                 }
 327         }
 328         return encoding;
 329 }
 330
 331 int CTortoiseGitBlameData::FindNextLine(CGitHash& CommitHash, int line, bool bUpOrDown)
 332 {
 333         int startline = line;
 334         bool findNoMatch = false;
 335         while (line >= 0 && line < (int)m_Hash.size())
 336         {
 337                 if (m_Hash[line] != CommitHash)
 338                         findNoMatch = true;
 339
 340                 if (m_Hash[line] == CommitHash && findNoMatch)
 341                 {
 342                         if (line == startline + 2)
 343                                 findNoMatch = false;
 344                         else
 345                         {
 346                                 if (bUpOrDown)
 347                                         line = FindFirstLineInBlock(CommitHash, line);
 348                                 return line;
 349                         }
 350                 }
 351                 if (bUpOrDown)
 352                         --line;
 353                 else
 354                         ++line;
 355         }
 356         return -1;
 357 }
 358
 359 static int FindAsciiLower(const CStringA &str, const CStringA &find)
 360 {
 361         if (find.GetLength() == 0)
 362                 return 0;
 363
 364         for (int i = 0; i < str.GetLength(); ++i)
 365         {
 366                 char c = str[i];
 367                 c += (c >= 'A' && c <= 'Z') ? 32 : 0;
 368                 if (c == find[0])
 369                 {
 370                         bool diff = false;
 371                         int k = 1;
 372                         for (int j = i + 1; j < str.GetLength() && k < find.GetLength(); ++j, ++k)
 373                         {
 374                                 char d = str[j];
 375                                 d += (d >= 'A' && d <= 'Z') ? 32 : 0;
 376                                 if (d != find[k])
 377                                 {
 378                                         diff = true;
 379                                         break;
 380                                 }
 381                         }
 382
 383                         if (!diff && k == find.GetLength())
 384                                 return i;
 385                 }
 386         }
 387
 388         return -1;
 389 }
 390
 391 static int FindUtf8Lower(const CStringA& strA, bool allAscii, const CString &findW, const CStringA &findA)
 392 {
 393         if (allAscii)
 394                 return FindAsciiLower(strA, findA);
 395
 396         CString strW = CUnicodeUtils::GetUnicode(strA);
 397         return strW.MakeLower().Find(findW);
 398 }
 399
 400 int CTortoiseGitBlameData::FindFirstLineWrapAround(const CString& what, int line, bool bCaseSensitive)
 401 {
 402         bool allAscii = true;
 403         for (int i = 0; i < what.GetLength(); ++i)
 404         {
 405                 if (what[i] > 0x7f)
 406                 {
 407                         allAscii = false;
 408                         break;
 409                 }
 410         }
 411         CString whatNormalized(what);
 412         if (!bCaseSensitive)
 413         {
 414                 whatNormalized.MakeLower();
 415         }
 416
 417         CStringA whatNormalizedUtf8 = CUnicodeUtils::GetUTF8(whatNormalized);
 418
 419         bool bFound = false;
 420
 421         int i = line;
 422         int numberOfLines = GetNumberOfLines();
 423         if (line < 0 || line + 1 >= numberOfLines)
 424                 i = 0;
 425
 426         do
 427         {
 428                 if (bCaseSensitive)
 429                 {
 430                         if (m_Authors[i].Find(whatNormalized) >= 0)
 431                                 bFound = true;
 432                         else if (m_Utf8Lines[i].Find(whatNormalizedUtf8) >=0)
 433                                 bFound = true;
 434                 }
 435                 else
 436                 {
 437                         if (CString(m_Authors[i]).MakeLower().Find(whatNormalized) >= 0)
 438                                 bFound = true;
 439                         else if (FindUtf8Lower(m_Utf8Lines[i], allAscii, whatNormalized, whatNormalizedUtf8) >= 0)
 440                                 bFound = true;
 441                 }
 442
 443                 if(bFound)
 444                 {
 445                         break;
 446                 }
 447                 else
 448                 {
 449                         ++i;
 450                         if (i >= numberOfLines)
 451                                 i = 0;
 452                 }
 453         } while (i != line);
 454
 455         return bFound ? i : -1;
 456 }
 457
 458 bool CTortoiseGitBlameData::ContainsOnlyFilename(const CString &filename) const
 459 {
 460         for (auto it = m_Filenames.cbegin(); it != m_Filenames.cend(); ++it)
 461         {
 462                 if (filename != *it)
 463                         return false;
 464         }
 465         return true;
 466 }
 467
 468 GitRev* CTortoiseGitBlameData::GetRevForHash(CGitHashMap & HashToRev, CGitHash& hash)
 469 {
 470         auto it = HashToRev.find(hash);
 471         if (it == HashToRev.end())
 472         {
 473                 GitRev rev;
 474                 rev.GetCommitFromHash(hash);
 475                 it = HashToRev.insert(std::make_pair(hash, rev)).first;
 476         }
 477         return &(it->second);
 478 }
 479
 480 CString CTortoiseGitBlameData::UnquoteFilename(CStringA& s)
 481 {
 482         if (s[0] == '"')
 483         {
 484                 CStringA ret;
 485                 int i_size = s.GetLength();
 486                 bool isEscaped = false;
 487                 for (int i = 1; i < i_size; ++i)
 488                 {
 489                         char c = s[i];
 490                         if (isEscaped)
 491                         {
 492                                 if (c >= '0' && c <= '3')
 493                                 {
 494                                         if (i + 2 < i_size)
 495                                         {
 496                                                 c = (((c - '0') & 03) << 6) | (((s[i + 1] - '0') & 07) << 3) | ((s[i + 2] - '0') & 07);
 497                                                 i += 2;
 498                                                 ret += c;
 499                                         }
 500                                 }
 501                                 else
 502                                 {
 503                                         switch (c)
 504                                         {
 505                                         case 'a' : c = '\a'; break;
 506                                         case 'b' : c = '\b'; break;
 507                                         case 't' : c = '\t'; break;
 508                                         case 'n' : c = '\n'; break;
 509                                         case 'v' : c = '\v'; break;
 510                                         case 'f' : c = '\f'; break;
 511                                         case 'r' : c = '\r'; break;
 512                                         }
 513                                         ret += c;
 514                                 }
 515                                 isEscaped = false;
 516                         }
 517                         else
 518                         {
 519                                 if (c == '\\')
 520                                 {
 521                                         isEscaped = true;
 522                                 }
 523                                 else if(c == '"')
 524                                 {
 525                                         break;
 526                                 }
 527                                 else
 528                                 {
 529                                         ret += c;
 530                                 }
 531                         }
 532                 }
 533                 return CUnicodeUtils::GetUnicode(ret);
 534         }
 535         else
 536                 return CUnicodeUtils::GetUnicode(s);
 537 }