optimize TGitCache for CLI operations
[TortoiseGit.git] / src / TortoiseMerge / FileTextLines.cpp
blob7dd834a3e7a459dabceda0fc52234f0473e3bf5a
1 // TortoiseMerge - a Diff/Patch program
3 // Copyright (C) 2007-2011 - TortoiseSVN
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License
7 // as published by the Free Software Foundation; either version 2
8 // of the License, or (at your option) any later version.
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software Foundation,
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 #include "StdAfx.h"
20 #include "Resource.h"
21 #include "UnicodeUtils.h"
22 #include "registry.h"
23 #include ".\filetextlines.h"
24 #include "FormatMessageWrapper.h"
25 #include "SmartHandle.h"
27 CFileTextLines::CFileTextLines(void)
28 : m_UnicodeType(CFileTextLines::AUTOTYPE)
29 , m_LineEndings(EOL_AUTOLINE)
30 , m_bReturnAtEnd(false)
34 CFileTextLines::~CFileTextLines(void)
38 CFileTextLines::UnicodeType CFileTextLines::CheckUnicodeType(LPVOID pBuffer, int cb)
40 if (cb < 2)
41 return CFileTextLines::ASCII;
42 UINT16 * pVal16 = (UINT16 *)pBuffer;
43 UINT8 * pVal8 = (UINT8 *)(pVal16+1);
44 // scan the whole buffer for a 0x0000 sequence
45 // if found, we assume a binary file
46 for (int i=0; i<(cb-2); i=i+2)
48 if (0x0000 == *pVal16++)
49 return CFileTextLines::BINARY;
51 pVal16 = (UINT16 *)pBuffer;
52 if (*pVal16 == 0xFEFF)
53 return CFileTextLines::UNICODE_LE;
54 if (cb < 3)
55 return ASCII;
56 if (*pVal16 == 0xBBEF)
58 if (*pVal8 == 0xBF)
59 return CFileTextLines::UTF8BOM;
61 // check for illegal UTF8 chars
62 pVal8 = (UINT8 *)pBuffer;
63 for (int i=0; i<cb; ++i)
65 if ((*pVal8 == 0xC0)||(*pVal8 == 0xC1)||(*pVal8 >= 0xF5))
66 return CFileTextLines::ASCII;
67 pVal8++;
69 pVal8 = (UINT8 *)pBuffer;
70 bool bUTF8 = false;
71 bool bNonANSI = false;
72 for (int i=0; i<(cb-3); ++i)
74 if (*pVal8 > 127)
75 bNonANSI = true;
76 if ((*pVal8 & 0xE0)==0xC0)
78 pVal8++;i++;
79 if ((*pVal8 & 0xC0)!=0x80)
80 return CFileTextLines::ASCII;
81 bUTF8 = true;
83 if ((*pVal8 & 0xF0)==0xE0)
85 pVal8++;i++;
86 if ((*pVal8 & 0xC0)!=0x80)
87 return CFileTextLines::ASCII;
88 pVal8++;i++;
89 if ((*pVal8 & 0xC0)!=0x80)
90 return CFileTextLines::ASCII;
91 bUTF8 = true;
93 if ((*pVal8 & 0xF8)==0xF0)
95 pVal8++;i++;
96 if ((*pVal8 & 0xC0)!=0x80)
97 return CFileTextLines::ASCII;
98 pVal8++;i++;
99 if ((*pVal8 & 0xC0)!=0x80)
100 return CFileTextLines::ASCII;
101 pVal8++;i++;
102 if ((*pVal8 & 0xC0)!=0x80)
103 return CFileTextLines::ASCII;
104 bUTF8 = true;
106 pVal8++;
108 if (bUTF8)
109 return CFileTextLines::UTF8;
110 if ((!bNonANSI)&&(DWORD(CRegDWORD(_T("Software\\TortoiseMerge\\UseUTF8"), FALSE))))
111 return CFileTextLines::UTF8;
112 return CFileTextLines::ASCII;
116 EOL CFileTextLines::CheckLineEndings(LPVOID pBuffer, int cb)
118 EOL retval = EOL_AUTOLINE;
119 char * buf = (char *)pBuffer;
120 for (int i=0; i<cb; i++)
122 //now search the buffer for line endings
123 if (buf[i] == 0x0a)
125 if ((i+1)<cb)
127 if (buf[i+1] == 0)
129 //UNICODE
130 if ((i+2)<cb)
132 if (buf[i+2] == 0x0d)
134 retval = EOL_LFCR;
135 break;
137 else
139 retval = EOL_LF;
140 break;
144 else if (buf[i+1] == 0x0d)
146 retval = EOL_LFCR;
147 break;
150 retval = EOL_LF;
151 break;
153 else if (buf[i] == 0x0d)
155 if ((i+1)<cb)
157 if (buf[i+1] == 0)
159 //UNICODE
160 if ((i+2)<cb)
162 if (buf[i+2] == 0x0a)
164 retval = EOL_CRLF;
165 break;
167 else
169 retval = EOL_CR;
170 break;
174 else if (buf[i+1] == 0x0a)
176 retval = EOL_CRLF;
177 break;
180 retval = EOL_CR;
181 break;
184 return retval;
187 BOOL CFileTextLines::Load(const CString& sFilePath, int lengthHint /* = 0*/)
189 m_LineEndings = EOL_AUTOLINE;
190 m_UnicodeType = CFileTextLines::AUTOTYPE;
191 RemoveAll();
192 m_endings.clear();
193 if(lengthHint != 0)
195 Reserve(lengthHint);
198 if (PathIsDirectory(sFilePath))
200 m_sErrorString.Format(IDS_ERR_FILE_NOTAFILE, (LPCTSTR)sFilePath);
201 return FALSE;
204 if (!PathFileExists(sFilePath))
206 //file does not exist, so just return SUCCESS
207 return TRUE;
210 CAutoFile hFile = CreateFile(sFilePath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, NULL, NULL);
211 if (!hFile)
213 SetErrorString();
214 return FALSE;
217 LARGE_INTEGER fsize;
218 if (!GetFileSizeEx(hFile, &fsize))
220 SetErrorString();
221 return false;
223 if (fsize.HighPart)
225 // file is way too big for us
226 m_sErrorString.LoadString(IDS_ERR_FILE_TOOBIG);
227 return FALSE;
230 // If new[] was done for type T delete[] must be called on a pointer of type T*,
231 // otherwise the behavior is undefined.
232 // +1 is to address possible truncation when integer division is done
233 wchar_t* pFileBuf = new wchar_t[fsize.LowPart/sizeof(wchar_t) + 1];
234 DWORD dwReadBytes = 0;
235 if (!ReadFile(hFile, pFileBuf, fsize.LowPart, &dwReadBytes, NULL))
237 delete [] pFileBuf;
238 SetErrorString();
239 return FALSE;
241 if (m_UnicodeType == CFileTextLines::AUTOTYPE)
243 m_UnicodeType = this->CheckUnicodeType(pFileBuf, dwReadBytes);
245 if (m_LineEndings == EOL_AUTOLINE)
247 m_LineEndings = CheckLineEndings(pFileBuf, min(10000, dwReadBytes));
249 hFile.CloseHandle();
251 if (m_UnicodeType == CFileTextLines::BINARY)
253 m_sErrorString.Format(IDS_ERR_FILE_BINARY, (LPCTSTR)sFilePath);
254 delete [] pFileBuf;
255 return FALSE;
258 // we may have to convert the file content
259 if ((m_UnicodeType == UTF8)||(m_UnicodeType == UTF8BOM))
261 int ret = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)pFileBuf, dwReadBytes, NULL, 0);
262 wchar_t * pWideBuf = new wchar_t[ret];
263 int ret2 = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)pFileBuf, dwReadBytes, pWideBuf, ret);
264 if (ret2 == ret)
266 delete [] pFileBuf;
267 pFileBuf = pWideBuf;
268 dwReadBytes = ret2;
269 } else
270 delete [] pWideBuf;
272 else if (m_UnicodeType == ASCII)
274 int ret = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, (LPCSTR)pFileBuf, dwReadBytes, NULL, 0);
275 wchar_t * pWideBuf = new wchar_t[ret];
276 int ret2 = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, (LPCSTR)pFileBuf, dwReadBytes, pWideBuf, ret);
277 if (ret2 == ret)
279 delete [] pFileBuf;
280 pFileBuf = pWideBuf;
281 dwReadBytes = ret2;
283 else
284 delete [] pWideBuf;
286 // fill in the lines into the array
287 wchar_t * pTextBuf = pFileBuf;
288 wchar_t * pLineStart = pFileBuf;
289 if (m_UnicodeType == UNICODE_LE)
291 // UTF16 have two bytes per char
292 dwReadBytes/=2;
294 if ((m_UnicodeType == UTF8BOM)||(m_UnicodeType == UNICODE_LE))
296 // ignore the BOM
297 ++pTextBuf;
298 ++pLineStart;
299 --dwReadBytes;
302 for (DWORD i = 0; i<dwReadBytes; ++i)
304 if (*pTextBuf == '\r')
306 if ((i + 1) < dwReadBytes)
308 if (*(pTextBuf+1) == '\n')
310 // crlf line ending
311 CString line(pLineStart, (int)(pTextBuf-pLineStart));
312 Add(line, EOL_CRLF);
313 pLineStart = pTextBuf+2;
314 ++pTextBuf;
315 ++i;
317 else
319 // cr line ending
320 CString line(pLineStart, (int)(pTextBuf-pLineStart));
321 Add(line, EOL_CR);
322 pLineStart =pTextBuf+1;
326 else if (*pTextBuf == '\n')
328 // lf line ending
329 CString line(pLineStart, (int)(pTextBuf-pLineStart));
330 Add(line, EOL_LF);
331 pLineStart =pTextBuf+1;
333 ++pTextBuf;
335 if (pLineStart < pTextBuf)
337 CString line(pLineStart, (int)(pTextBuf-pLineStart));
338 Add(line, EOL_NOENDING);
339 m_bReturnAtEnd = false;
341 else
342 m_bReturnAtEnd = true;
344 delete [] pFileBuf;
346 return TRUE;
349 void CFileTextLines::StripWhiteSpace(CString& sLine,DWORD dwIgnoreWhitespaces, bool blame)
351 if (blame)
353 if (sLine.GetLength() > 66)
354 sLine = sLine.Mid(66);
356 switch (dwIgnoreWhitespaces)
358 case 0:
359 // Compare whitespaces
360 // do nothing
361 break;
362 case 1:
363 // Ignore all whitespaces
364 sLine.TrimLeft(_T(" \t"));
365 sLine.TrimRight(_T(" \t"));
366 break;
367 case 2:
368 // Ignore leading whitespace
369 sLine.TrimLeft(_T(" \t"));
370 break;
371 case 3:
372 // Ignore ending whitespace
373 sLine.TrimRight(_T(" \t"));
374 break;
378 void CFileTextLines::StripAsciiWhiteSpace(CStringA& sLine,DWORD dwIgnoreWhitespaces, bool blame)
380 if (blame)
382 if (sLine.GetLength() > 66)
383 sLine = sLine.Mid(66);
385 switch (dwIgnoreWhitespaces)
387 case 0: // Compare whitespaces
388 // do nothing
389 break;
390 case 1:
391 // Ignore all whitespaces
392 StripAsciiWhiteSpace(sLine);
393 break;
394 case 2:
395 // Ignore leading whitespace
396 sLine.TrimLeft(" \t");
397 break;
398 case 3:
399 // Ignore leading whitespace
400 sLine.TrimRight(" \t");
401 break;
406 // Fast in-place removal of spaces and tabs from CStringA line
408 void CFileTextLines::StripAsciiWhiteSpace(CStringA& sLine)
410 int outputLen = 0;
411 char* pWriteChr = sLine.GetBuffer(sLine.GetLength());
412 const char* pReadChr = pWriteChr;
413 while(*pReadChr)
415 if(*pReadChr != ' ' && *pReadChr != '\t')
417 *pWriteChr++ = *pReadChr;
418 outputLen++;
420 ++pReadChr;
422 *pWriteChr = '\0';
423 sLine.ReleaseBuffer(outputLen);
426 BOOL CFileTextLines::Save(const CString& sFilePath, bool bSaveAsUTF8, DWORD dwIgnoreWhitespaces /*=0*/, BOOL bIgnoreCase /*= FALSE*/, bool bBlame /*= false*/)
430 CString destPath = sFilePath;
431 // now make sure that the destination directory exists
432 int ind = 0;
433 while (destPath.Find('\\', ind)>=2)
435 if (!PathIsDirectory(destPath.Left(destPath.Find('\\', ind))))
437 if (!CreateDirectory(destPath.Left(destPath.Find('\\', ind)), NULL))
438 return FALSE;
440 ind = destPath.Find('\\', ind)+1;
443 CStdioFile file; // Hugely faster than CFile for big file writes - because it uses buffering
444 if (!file.Open(sFilePath, CFile::modeCreate | CFile::modeWrite | CFile::typeBinary))
446 m_sErrorString.Format(IDS_ERR_FILE_OPEN, (LPCTSTR)sFilePath);
447 return FALSE;
449 if ((!bSaveAsUTF8)&&(m_UnicodeType == CFileTextLines::UNICODE_LE))
451 //first write the BOM
452 UINT16 wBOM = 0xFEFF;
453 file.Write(&wBOM, 2);
454 for (int i=0; i<GetCount(); i++)
456 CString sLine = GetAt(i);
457 EOL ending = GetLineEnding(i);
458 StripWhiteSpace(sLine,dwIgnoreWhitespaces, bBlame);
459 if (bIgnoreCase)
460 sLine = sLine.MakeLower();
461 file.Write((LPCTSTR)sLine, sLine.GetLength()*sizeof(TCHAR));
462 if (ending == EOL_AUTOLINE)
463 ending = m_LineEndings;
464 switch (ending)
466 case EOL_CR:
467 sLine = _T("\x0d");
468 break;
469 case EOL_CRLF:
470 case EOL_AUTOLINE:
471 sLine = _T("\x0d\x0a");
472 break;
473 case EOL_LF:
474 sLine = _T("\x0a");
475 break;
476 case EOL_LFCR:
477 sLine = _T("\x0a\x0d");
478 break;
479 default:
480 sLine.Empty();
481 break;
483 if ((m_bReturnAtEnd)||(i != GetCount()-1))
484 file.Write((LPCTSTR)sLine, sLine.GetLength()*sizeof(TCHAR));
487 else if ((!bSaveAsUTF8)&&((m_UnicodeType == CFileTextLines::ASCII)||(m_UnicodeType == CFileTextLines::AUTOTYPE)))
489 for (int i=0; i< GetCount(); i++)
491 // Copy CString to 8 bit without conversion
492 CString sLineT = GetAt(i);
493 CStringA sLine = CStringA(sLineT);
494 EOL ending = GetLineEnding(i);
496 StripAsciiWhiteSpace(sLine,dwIgnoreWhitespaces, bBlame);
497 if (bIgnoreCase)
498 sLine = sLine.MakeLower();
499 if ((m_bReturnAtEnd)||(i != GetCount()-1))
501 if (ending == EOL_AUTOLINE)
502 ending = m_LineEndings;
503 switch (ending)
505 case EOL_CR:
506 sLine += '\x0d';
507 break;
508 case EOL_CRLF:
509 case EOL_AUTOLINE:
510 sLine.Append("\x0d\x0a", 2);
511 break;
512 case EOL_LF:
513 sLine += '\x0a';
514 break;
515 case EOL_LFCR:
516 sLine.Append("\x0a\x0d", 2);
517 break;
520 file.Write((LPCSTR)sLine, sLine.GetLength());
523 else if ((bSaveAsUTF8)||((m_UnicodeType == CFileTextLines::UTF8BOM)||(m_UnicodeType == CFileTextLines::UTF8)))
525 if (m_UnicodeType == CFileTextLines::UTF8BOM)
527 //first write the BOM
528 UINT16 wBOM = 0xBBEF;
529 file.Write(&wBOM, 2);
530 UINT8 uBOM = 0xBF;
531 file.Write(&uBOM, 1);
533 for (int i=0; i<GetCount(); i++)
535 CStringA sLine = CUnicodeUtils::GetUTF8(GetAt(i));
536 EOL ending = GetLineEnding(i);
537 StripAsciiWhiteSpace(sLine,dwIgnoreWhitespaces, bBlame);
538 if (bIgnoreCase)
539 sLine = sLine.MakeLower();
541 if ((m_bReturnAtEnd)||(i != GetCount()-1))
543 if (ending == EOL_AUTOLINE)
544 ending = m_LineEndings;
545 switch (ending)
547 case EOL_CR:
548 sLine += '\x0d';
549 break;
550 case EOL_CRLF:
551 case EOL_AUTOLINE:
552 sLine.Append("\x0d\x0a",2);
553 break;
554 case EOL_LF:
555 sLine += '\x0a';
556 break;
557 case EOL_LFCR:
558 sLine.Append("\x0a\x0d",2);
559 break;
562 file.Write((LPCSTR)sLine, sLine.GetLength());
565 file.Close();
567 catch (CException * e)
569 e->GetErrorMessage(m_sErrorString.GetBuffer(4096), 4096);
570 m_sErrorString.ReleaseBuffer();
571 e->Delete();
572 return FALSE;
574 return TRUE;
577 void CFileTextLines::SetErrorString()
579 m_sErrorString = CFormatMessageWrapper();
582 void CFileTextLines::CopySettings(CFileTextLines * pFileToCopySettingsTo)
584 if (pFileToCopySettingsTo)
586 pFileToCopySettingsTo->m_UnicodeType = m_UnicodeType;
587 pFileToCopySettingsTo->m_LineEndings = m_LineEndings;
588 pFileToCopySettingsTo->m_bReturnAtEnd = m_bReturnAtEnd;