Fixed issue #1507: Submodule Diff Dialog should show dirty state only on working...
[TortoiseGit.git] / src / TortoiseMerge / FileTextLines.cpp
blobcc3d50281990a3638fec51bbfd5f2fc2b577dd7a
1 // TortoiseMerge - a Diff/Patch program
3 // Copyright (C) 2007-2011 - TortoiseSVN
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License
7 // as published by the Free Software Foundation; either version 2
8 // of the License, or (at your option) any later version.
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software Foundation,
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 #include "StdAfx.h"
20 #include "Resource.h"
21 #include "UnicodeUtils.h"
22 #include "registry.h"
23 #include ".\filetextlines.h"
24 #include "FormatMessageWrapper.h"
25 #include "SmartHandle.h"
26 #include "auto_buffer.h"
28 wchar_t WideCharSwap(wchar_t nValue)
30 return (((nValue>> 8)) | (nValue << 8));
33 CFileTextLines::CFileTextLines(void)
34 : m_UnicodeType(CFileTextLines::AUTOTYPE)
35 , m_LineEndings(EOL_AUTOLINE)
36 , m_bReturnAtEnd(false)
40 CFileTextLines::~CFileTextLines(void)
44 CFileTextLines::UnicodeType CFileTextLines::CheckUnicodeType(LPVOID pBuffer, int cb)
46 if (cb < 2)
47 return CFileTextLines::ASCII;
48 UINT16 * pVal16 = (UINT16 *)pBuffer;
49 UINT8 * pVal8 = (UINT8 *)(pVal16+1);
50 // scan the whole buffer for a 0x0000 sequence
51 // if found, we assume a binary file
52 for (int i=0; i<(cb-2); i=i+2)
54 if (0x0000 == *pVal16++)
55 return CFileTextLines::BINARY;
57 pVal16 = (UINT16 *)pBuffer;
58 if (*pVal16 == 0xFEFF)
59 return CFileTextLines::UNICODE_LE;
60 if (*pVal16 == 0xFFFE)
61 return CFileTextLines::UNICODE_BE;
62 if (cb < 3)
63 return ASCII;
64 if (*pVal16 == 0xBBEF)
66 if (*pVal8 == 0xBF)
67 return CFileTextLines::UTF8BOM;
69 // check for illegal UTF8 chars
70 pVal8 = (UINT8 *)pBuffer;
71 for (int i=0; i<cb; ++i)
73 if ((*pVal8 == 0xC0)||(*pVal8 == 0xC1)||(*pVal8 >= 0xF5))
74 return CFileTextLines::ASCII;
75 pVal8++;
77 pVal8 = (UINT8 *)pBuffer;
78 bool bUTF8 = false;
79 bool bNonANSI = false;
80 for (int i=0; i<(cb-3); ++i)
82 if (*pVal8 > 127)
83 bNonANSI = true;
84 if ((*pVal8 & 0xE0)==0xC0)
86 pVal8++;i++;
87 if ((*pVal8 & 0xC0)!=0x80)
88 return CFileTextLines::ASCII;
89 bUTF8 = true;
91 if ((*pVal8 & 0xF0)==0xE0)
93 pVal8++;i++;
94 if ((*pVal8 & 0xC0)!=0x80)
95 return CFileTextLines::ASCII;
96 pVal8++;i++;
97 if ((*pVal8 & 0xC0)!=0x80)
98 return CFileTextLines::ASCII;
99 bUTF8 = true;
101 if ((*pVal8 & 0xF8)==0xF0)
103 pVal8++;i++;
104 if ((*pVal8 & 0xC0)!=0x80)
105 return CFileTextLines::ASCII;
106 pVal8++;i++;
107 if ((*pVal8 & 0xC0)!=0x80)
108 return CFileTextLines::ASCII;
109 pVal8++;i++;
110 if ((*pVal8 & 0xC0)!=0x80)
111 return CFileTextLines::ASCII;
112 bUTF8 = true;
114 pVal8++;
116 if (bUTF8)
117 return CFileTextLines::UTF8;
118 if ((!bNonANSI)&&(DWORD(CRegDWORD(_T("Software\\TortoiseMerge\\UseUTF8"), FALSE))))
119 return CFileTextLines::UTF8;
120 return CFileTextLines::ASCII;
124 EOL CFileTextLines::CheckLineEndings(LPVOID pBuffer, int cb)
126 EOL retval = EOL_AUTOLINE;
127 char * buf = (char *)pBuffer;
128 for (int i=0; i<cb; i++)
130 //now search the buffer for line endings
131 if (buf[i] == 0x0a)
133 if ((i+1)<cb)
135 if (buf[i+1] == 0)
137 //UNICODE
138 if ((i+2)<cb)
140 if (buf[i+2] == 0x0d)
142 retval = EOL_LFCR;
143 break;
145 else
147 retval = EOL_LF;
148 break;
152 else if (buf[i+1] == 0x0d)
154 retval = EOL_LFCR;
155 break;
158 retval = EOL_LF;
159 break;
161 else if (buf[i] == 0x0d)
163 if ((i+1)<cb)
165 if (buf[i+1] == 0)
167 //UNICODE
168 if ((i+2)<cb)
170 if (buf[i+2] == 0x0a)
172 retval = EOL_CRLF;
173 break;
175 else
177 retval = EOL_CR;
178 break;
182 else if (buf[i+1] == 0x0a)
184 retval = EOL_CRLF;
185 break;
188 retval = EOL_CR;
189 break;
192 return retval;
195 BOOL CFileTextLines::Load(const CString& sFilePath, int lengthHint /* = 0*/)
197 WCHAR exceptionError[1000] = {0};
198 m_LineEndings = EOL_AUTOLINE;
199 m_UnicodeType = CFileTextLines::AUTOTYPE;
200 RemoveAll();
201 m_endings.clear();
202 if(lengthHint != 0)
204 Reserve(lengthHint);
207 if (PathIsDirectory(sFilePath))
209 m_sErrorString.Format(IDS_ERR_FILE_NOTAFILE, (LPCTSTR)sFilePath);
210 return FALSE;
213 if (!PathFileExists(sFilePath))
215 //file does not exist, so just return SUCCESS
216 return TRUE;
219 CAutoFile hFile = CreateFile(sFilePath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, NULL, NULL);
220 if (!hFile)
222 SetErrorString();
223 return FALSE;
226 LARGE_INTEGER fsize;
227 if (!GetFileSizeEx(hFile, &fsize))
229 SetErrorString();
230 return false;
232 if (fsize.HighPart)
234 // file is way too big for us
235 m_sErrorString.LoadString(IDS_ERR_FILE_TOOBIG);
236 return FALSE;
239 // If new[] was done for type T delete[] must be called on a pointer of type T*,
240 // otherwise the behavior is undefined.
241 // +1 is to address possible truncation when integer division is done
242 wchar_t* pFileBuf = NULL;
245 pFileBuf = new wchar_t[fsize.LowPart/sizeof(wchar_t) + 1];
247 catch (CMemoryException* e)
249 e->GetErrorMessage(exceptionError, _countof(exceptionError));
250 m_sErrorString = exceptionError;
251 return FALSE;
253 DWORD dwReadBytes = 0;
254 if (!ReadFile(hFile, pFileBuf, fsize.LowPart, &dwReadBytes, NULL))
256 delete [] pFileBuf;
257 SetErrorString();
258 return FALSE;
260 if (m_UnicodeType == CFileTextLines::AUTOTYPE)
262 m_UnicodeType = this->CheckUnicodeType(pFileBuf, dwReadBytes);
264 if (m_LineEndings == EOL_AUTOLINE)
266 m_LineEndings = CheckLineEndings(pFileBuf, min(10000, dwReadBytes));
268 hFile.CloseHandle();
270 if (m_UnicodeType == CFileTextLines::BINARY)
272 m_sErrorString.Format(IDS_ERR_FILE_BINARY, (LPCTSTR)sFilePath);
273 delete [] pFileBuf;
274 return FALSE;
277 // we may have to convert the file content
278 if ((m_UnicodeType == UTF8)||(m_UnicodeType == UTF8BOM))
280 int ret = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)pFileBuf, dwReadBytes, NULL, 0);
281 wchar_t * pWideBuf = NULL;
284 pWideBuf = new wchar_t[ret];
286 catch (CMemoryException* e)
288 e->GetErrorMessage(exceptionError, _countof(exceptionError));
289 m_sErrorString = exceptionError;
290 delete [] pFileBuf;
291 return FALSE;
293 int ret2 = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)pFileBuf, dwReadBytes, pWideBuf, ret);
294 if (ret2 == ret)
296 delete [] pFileBuf;
297 pFileBuf = pWideBuf;
298 dwReadBytes = ret2;
299 } else
300 delete [] pWideBuf;
302 else if (m_UnicodeType == ASCII)
304 int ret = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, (LPCSTR)pFileBuf, dwReadBytes, NULL, 0);
305 wchar_t * pWideBuf = NULL;
308 pWideBuf = new wchar_t[ret];
310 catch (CMemoryException* e)
312 e->GetErrorMessage(exceptionError, _countof(exceptionError));
313 m_sErrorString = exceptionError;
314 delete [] pFileBuf;
315 return FALSE;
317 int ret2 = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, (LPCSTR)pFileBuf, dwReadBytes, pWideBuf, ret);
318 if (ret2 == ret)
320 delete [] pFileBuf;
321 pFileBuf = pWideBuf;
322 dwReadBytes = ret2;
324 else
325 delete [] pWideBuf;
327 // fill in the lines into the array
328 wchar_t * pTextBuf = pFileBuf;
329 wchar_t * pLineStart = pFileBuf;
330 if ((m_UnicodeType == UNICODE_LE)||(m_UnicodeType == UNICODE_BE))
332 // UTF16 have two bytes per char
333 dwReadBytes/=2;
335 if ((m_UnicodeType == UTF8BOM)||(m_UnicodeType == UNICODE_LE)||(m_UnicodeType == UNICODE_BE))
337 // ignore the BOM
338 ++pTextBuf;
339 ++pLineStart;
340 --dwReadBytes;
343 if (m_UnicodeType == UNICODE_BE)
345 // swap the bytes to little-endian order to get proper strings in wchar_t format
346 wchar_t * pSwapBuf = pTextBuf;
347 for (DWORD i = 0; i<dwReadBytes; ++i)
349 *pSwapBuf = WideCharSwap(*pSwapBuf);
350 ++pSwapBuf;
354 for (DWORD i = 0; i<dwReadBytes; ++i)
356 if (*pTextBuf == '\r')
358 if ((i + 1) < dwReadBytes)
360 if (*(pTextBuf+1) == '\n')
362 // crlf line ending
363 CString line(pLineStart, (int)(pTextBuf-pLineStart));
364 Add(line, EOL_CRLF);
365 pLineStart = pTextBuf+2;
366 ++pTextBuf;
367 ++i;
369 else
371 // cr line ending
372 CString line(pLineStart, (int)(pTextBuf-pLineStart));
373 Add(line, EOL_CR);
374 pLineStart =pTextBuf+1;
378 else if (*pTextBuf == '\n')
380 // lf line ending
381 CString line(pLineStart, (int)(pTextBuf-pLineStart));
382 Add(line, EOL_LF);
383 pLineStart =pTextBuf+1;
385 ++pTextBuf;
387 if (pLineStart < pTextBuf)
389 CString line(pLineStart, (int)(pTextBuf-pLineStart));
390 Add(line, EOL_NOENDING);
391 m_bReturnAtEnd = false;
393 else
394 m_bReturnAtEnd = true;
396 delete [] pFileBuf;
398 return TRUE;
401 void CFileTextLines::StripWhiteSpace(CString& sLine,DWORD dwIgnoreWhitespaces, bool blame)
403 if (blame)
405 if (sLine.GetLength() > 66)
406 sLine = sLine.Mid(66);
408 switch (dwIgnoreWhitespaces)
410 case 0:
411 // Compare whitespaces
412 // do nothing
413 break;
414 case 1:
415 // Ignore all whitespaces
416 sLine.TrimLeft(_T(" \t"));
417 sLine.TrimRight(_T(" \t"));
418 break;
419 case 2:
420 // Ignore leading whitespace
421 sLine.TrimLeft(_T(" \t"));
422 break;
423 case 3:
424 // Ignore ending whitespace
425 sLine.TrimRight(_T(" \t"));
426 break;
430 void CFileTextLines::StripAsciiWhiteSpace(CStringA& sLine,DWORD dwIgnoreWhitespaces, bool blame)
432 if (blame)
434 if (sLine.GetLength() > 66)
435 sLine = sLine.Mid(66);
437 switch (dwIgnoreWhitespaces)
439 case 0: // Compare whitespaces
440 // do nothing
441 break;
442 case 1:
443 // Ignore all whitespaces
444 StripAsciiWhiteSpace(sLine);
445 break;
446 case 2:
447 // Ignore leading whitespace
448 sLine.TrimLeft(" \t");
449 break;
450 case 3:
451 // Ignore leading whitespace
452 sLine.TrimRight(" \t");
453 break;
458 // Fast in-place removal of spaces and tabs from CStringA line
460 void CFileTextLines::StripAsciiWhiteSpace(CStringA& sLine)
462 int outputLen = 0;
463 char* pWriteChr = sLine.GetBuffer(sLine.GetLength());
464 const char* pReadChr = pWriteChr;
465 while(*pReadChr)
467 if(*pReadChr != ' ' && *pReadChr != '\t')
469 *pWriteChr++ = *pReadChr;
470 outputLen++;
472 ++pReadChr;
474 *pWriteChr = '\0';
475 sLine.ReleaseBuffer(outputLen);
478 BOOL CFileTextLines::Save(const CString& sFilePath, bool bSaveAsUTF8, DWORD dwIgnoreWhitespaces /*=0*/, BOOL bIgnoreCase /*= FALSE*/, bool bBlame /*= false*/)
482 CString destPath = sFilePath;
483 // now make sure that the destination directory exists
484 int ind = 0;
485 while (destPath.Find('\\', ind)>=2)
487 if (!PathIsDirectory(destPath.Left(destPath.Find('\\', ind))))
489 if (!CreateDirectory(destPath.Left(destPath.Find('\\', ind)), NULL))
490 return FALSE;
492 ind = destPath.Find('\\', ind)+1;
495 CStdioFile file; // Hugely faster than CFile for big file writes - because it uses buffering
496 if (!file.Open(sFilePath, CFile::modeCreate | CFile::modeWrite | CFile::typeBinary))
498 m_sErrorString.Format(IDS_ERR_FILE_OPEN, (LPCTSTR)sFilePath);
499 return FALSE;
501 if ((!bSaveAsUTF8)&&(m_UnicodeType == CFileTextLines::UNICODE_LE))
503 //first write the BOM
504 UINT16 wBOM = 0xFEFF;
505 file.Write(&wBOM, 2);
506 for (int i=0; i<GetCount(); i++)
508 CString sLine = GetAt(i);
509 EOL ending = GetLineEnding(i);
510 StripWhiteSpace(sLine,dwIgnoreWhitespaces, bBlame);
511 if (bIgnoreCase)
512 sLine = sLine.MakeLower();
513 file.Write((LPCTSTR)sLine, sLine.GetLength()*sizeof(TCHAR));
514 if (ending == EOL_AUTOLINE)
515 ending = m_LineEndings;
516 switch (ending)
518 case EOL_CR:
519 sLine = _T("\x0d");
520 break;
521 case EOL_CRLF:
522 case EOL_AUTOLINE:
523 sLine = _T("\x0d\x0a");
524 break;
525 case EOL_LF:
526 sLine = _T("\x0a");
527 break;
528 case EOL_LFCR:
529 sLine = _T("\x0a\x0d");
530 break;
531 default:
532 sLine.Empty();
533 break;
535 if ((m_bReturnAtEnd)||(i != GetCount()-1))
536 file.Write((LPCTSTR)sLine, sLine.GetLength()*sizeof(TCHAR));
539 if ((!bSaveAsUTF8)&&(m_UnicodeType == CFileTextLines::UNICODE_BE))
541 int linebuflen = 4096;
542 auto_buffer<BYTE> beBuf(linebuflen);
543 //first write the BOM
544 UINT16 wBOM = 0xFFFE;
545 file.Write(&wBOM, 2);
546 for (int i=0; i<GetCount(); i++)
548 CString sLine = GetAt(i);
549 EOL ending = GetLineEnding(i);
550 StripWhiteSpace(sLine,dwIgnoreWhitespaces, bBlame);
551 if (bIgnoreCase)
552 sLine = sLine.MakeLower();
553 int bytelen = sLine.GetLength()*sizeof(WCHAR);
554 if (bytelen > linebuflen)
556 // increase buffer size if necessary
557 linebuflen = bytelen + 1024;
558 beBuf.reset(linebuflen);
560 for (int spos = 0; spos < bytelen; )
562 // swap the bytes to big-endian order
563 wchar_t c = sLine[spos/2];
564 beBuf[spos++] = c>>8;
565 beBuf[spos++] = c & 0xFF;
567 file.Write(beBuf.get(), sLine.GetLength()*sizeof(WCHAR));
568 if (ending == EOL_AUTOLINE)
569 ending = m_LineEndings;
570 switch (ending)
572 case EOL_CR:
573 sLine = _T("\x0d");
574 break;
575 case EOL_CRLF:
576 case EOL_AUTOLINE:
577 sLine = _T("\x0d\x0a");
578 break;
579 case EOL_LF:
580 sLine = _T("\x0a");
581 break;
582 case EOL_LFCR:
583 sLine = _T("\x0a\x0d");
584 break;
585 default:
586 sLine.Empty();
587 break;
589 if ((m_bReturnAtEnd)||(i != GetCount()-1))
591 // swap the bytes to big-endian order
592 BYTE buf[5];
593 int p = 0;
594 if (sLine.GetLength() > 0)
596 wchar_t c = sLine[0];
597 buf[p++] = c>>8;
598 buf[p++] = c & 0xFF;
600 if (sLine.GetLength() > 1)
602 wchar_t c = sLine[1];
603 buf[p++] = c>>8;
604 buf[p++] = c & 0xFF;
607 file.Write(buf, sLine.GetLength()*sizeof(TCHAR));
611 else if ((!bSaveAsUTF8)&&((m_UnicodeType == CFileTextLines::ASCII)||(m_UnicodeType == CFileTextLines::AUTOTYPE)))
613 for (int i=0; i< GetCount(); i++)
615 // Copy CString to 8 bit without conversion
616 CString sLineT = GetAt(i);
617 CStringA sLine = CStringA(sLineT);
618 EOL ending = GetLineEnding(i);
620 StripAsciiWhiteSpace(sLine,dwIgnoreWhitespaces, bBlame);
621 if (bIgnoreCase)
622 sLine = sLine.MakeLower();
623 if ((m_bReturnAtEnd)||(i != GetCount()-1))
625 if (ending == EOL_AUTOLINE)
626 ending = m_LineEndings;
627 switch (ending)
629 case EOL_CR:
630 sLine += '\x0d';
631 break;
632 case EOL_CRLF:
633 case EOL_AUTOLINE:
634 sLine.Append("\x0d\x0a", 2);
635 break;
636 case EOL_LF:
637 sLine += '\x0a';
638 break;
639 case EOL_LFCR:
640 sLine.Append("\x0a\x0d", 2);
641 break;
644 file.Write((LPCSTR)sLine, sLine.GetLength());
647 else if ((bSaveAsUTF8)||((m_UnicodeType == CFileTextLines::UTF8BOM)||(m_UnicodeType == CFileTextLines::UTF8)))
649 if (m_UnicodeType == CFileTextLines::UTF8BOM)
651 //first write the BOM
652 UINT16 wBOM = 0xBBEF;
653 file.Write(&wBOM, 2);
654 UINT8 uBOM = 0xBF;
655 file.Write(&uBOM, 1);
657 for (int i=0; i<GetCount(); i++)
659 CStringA sLine = CUnicodeUtils::GetUTF8(GetAt(i));
660 EOL ending = GetLineEnding(i);
661 StripAsciiWhiteSpace(sLine,dwIgnoreWhitespaces, bBlame);
662 if (bIgnoreCase)
663 sLine = sLine.MakeLower();
665 if ((m_bReturnAtEnd)||(i != GetCount()-1))
667 if (ending == EOL_AUTOLINE)
668 ending = m_LineEndings;
669 switch (ending)
671 case EOL_CR:
672 sLine += '\x0d';
673 break;
674 case EOL_CRLF:
675 case EOL_AUTOLINE:
676 sLine.Append("\x0d\x0a",2);
677 break;
678 case EOL_LF:
679 sLine += '\x0a';
680 break;
681 case EOL_LFCR:
682 sLine.Append("\x0a\x0d",2);
683 break;
686 file.Write((LPCSTR)sLine, sLine.GetLength());
689 file.Close();
691 catch (CException * e)
693 e->GetErrorMessage(m_sErrorString.GetBuffer(4096), 4096);
694 m_sErrorString.ReleaseBuffer();
695 e->Delete();
696 return FALSE;
698 return TRUE;
701 void CFileTextLines::SetErrorString()
703 m_sErrorString = CFormatMessageWrapper();
706 void CFileTextLines::CopySettings(CFileTextLines * pFileToCopySettingsTo)
708 if (pFileToCopySettingsTo)
710 pFileToCopySettingsTo->m_UnicodeType = m_UnicodeType;
711 pFileToCopySettingsTo->m_LineEndings = m_LineEndings;
712 pFileToCopySettingsTo->m_bReturnAtEnd = m_bReturnAtEnd;