Update diff del rename ignore document.
[TortoiseGit.git] / src / TortoiseMerge / FileTextLines.cpp
blob41071c988192b036aeff0f38a0555cc825cb38fe
1 // TortoiseMerge - a Diff/Patch program
3 // Copyright (C) 2007-2009 - TortoiseSVN
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License
7 // as published by the Free Software Foundation; either version 2
8 // of the License, or (at your option) any later version.
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software Foundation,
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 #include "StdAfx.h"
20 #include "Resource.h"
21 #include "UnicodeUtils.h"
22 #include "registry.h"
23 #include ".\filetextlines.h"
26 CFileTextLines::CFileTextLines(void)
30 CFileTextLines::~CFileTextLines(void)
34 CFileTextLines::UnicodeType CFileTextLines::CheckUnicodeType(LPVOID pBuffer, int cb)
36 if (cb < 2)
37 return CFileTextLines::ASCII;
38 UINT16 * pVal = (UINT16 *)pBuffer;
39 UINT8 * pVal2 = (UINT8 *)(pVal+1);
40 // scan the whole buffer for a 0x0000 sequence
41 // if found, we assume a binary file
42 for (int i=0; i<(cb-2); i=i+2)
44 if (0x0000 == *pVal++)
45 return CFileTextLines::BINARY;
47 pVal = (UINT16 *)pBuffer;
48 if (*pVal == 0xFEFF)
49 return CFileTextLines::UNICODE_LE;
50 if (cb < 3)
51 return ASCII;
52 if (*pVal == 0xBBEF)
54 if (*pVal2 == 0xBF)
55 return CFileTextLines::UTF8BOM;
57 // check for illegal UTF8 chars
58 pVal2 = (UINT8 *)pBuffer;
59 for (int i=0; i<cb; ++i)
61 if ((*pVal2 == 0xC0)||(*pVal2 == 0xC1)||(*pVal2 >= 0xF5))
62 return CFileTextLines::ASCII;
63 pVal2++;
65 pVal2 = (UINT8 *)pBuffer;
66 bool bUTF8 = false;
67 bool bNonANSI = false;
68 for (int i=0; i<(cb-3); ++i)
70 if (*pVal2 > 127)
71 bNonANSI = true;
72 if ((*pVal2 & 0xE0)==0xC0)
74 pVal2++;i++;
75 if ((*pVal2 & 0xC0)!=0x80)
76 return CFileTextLines::ASCII;
77 bUTF8 = true;
79 if ((*pVal2 & 0xF0)==0xE0)
81 pVal2++;i++;
82 if ((*pVal2 & 0xC0)!=0x80)
83 return CFileTextLines::ASCII;
84 pVal2++;i++;
85 if ((*pVal2 & 0xC0)!=0x80)
86 return CFileTextLines::ASCII;
87 bUTF8 = true;
89 if ((*pVal2 & 0xF8)==0xF0)
91 pVal2++;i++;
92 if ((*pVal2 & 0xC0)!=0x80)
93 return CFileTextLines::ASCII;
94 pVal2++;i++;
95 if ((*pVal2 & 0xC0)!=0x80)
96 return CFileTextLines::ASCII;
97 pVal2++;i++;
98 if ((*pVal2 & 0xC0)!=0x80)
99 return CFileTextLines::ASCII;
100 bUTF8 = true;
102 pVal2++;
104 if (bUTF8)
105 return CFileTextLines::UTF8;
106 if ((!bNonANSI)&&(DWORD(CRegDWORD(_T("Software\\TortoiseMerge\\UseUTF8"), FALSE))))
107 return CFileTextLines::UTF8;
108 return CFileTextLines::ASCII;
112 EOL CFileTextLines::CheckLineEndings(LPVOID pBuffer, int cb)
114 EOL retval = EOL_AUTOLINE;
115 char * buf = (char *)pBuffer;
116 for (int i=0; i<cb; i++)
118 //now search the buffer for line endings
119 if (buf[i] == 0x0a)
121 if ((i+1)<cb)
123 if (buf[i+1] == 0)
125 //UNICODE
126 if ((i+2)<cb)
128 if (buf[i+2] == 0x0d)
130 retval = EOL_LFCR;
131 break;
133 else
135 retval = EOL_LF;
136 break;
140 else if (buf[i+1] == 0x0d)
142 retval = EOL_LFCR;
143 break;
146 retval = EOL_LF;
147 break;
149 else if (buf[i] == 0x0d)
151 if ((i+1)<cb)
153 if (buf[i+1] == 0)
155 //UNICODE
156 if ((i+2)<cb)
158 if (buf[i+2] == 0x0a)
160 retval = EOL_CRLF;
161 break;
163 else
165 retval = EOL_CR;
166 break;
170 else if (buf[i+1] == 0x0a)
172 retval = EOL_CRLF;
173 break;
176 retval = EOL_CR;
177 break;
180 return retval;
183 BOOL CFileTextLines::Load(const CString& sFilePath, int lengthHint /* = 0*/)
185 m_LineEndings = EOL_AUTOLINE;
186 m_UnicodeType = CFileTextLines::AUTOTYPE;
187 RemoveAll();
188 m_endings.clear();
189 if(lengthHint != 0)
191 Reserve(lengthHint);
194 if (PathIsDirectory(sFilePath))
196 m_sErrorString.Format(IDS_ERR_FILE_NOTAFILE, (LPCTSTR)sFilePath);
197 return FALSE;
200 if (!PathFileExists(sFilePath))
202 //file does not exist, so just return SUCCESS
203 return TRUE;
206 HANDLE hFile = CreateFile(sFilePath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, NULL, NULL);
207 if (hFile == INVALID_HANDLE_VALUE)
209 SetErrorString();
210 return FALSE;
213 LARGE_INTEGER fsize;
214 if (!GetFileSizeEx(hFile, &fsize))
216 SetErrorString();
217 CloseHandle(hFile);
218 return false;
220 if (fsize.HighPart)
222 // file is way too big for us
223 CloseHandle(hFile);
224 m_sErrorString.LoadString(IDS_ERR_FILE_TOOBIG);
225 return FALSE;
228 LPVOID pFileBuf = new BYTE[fsize.LowPart];
229 DWORD dwReadBytes = 0;
230 if (!ReadFile(hFile, pFileBuf, fsize.LowPart, &dwReadBytes, NULL))
232 SetErrorString();
233 CloseHandle(hFile);
234 return FALSE;
236 if (m_UnicodeType == CFileTextLines::AUTOTYPE)
238 m_UnicodeType = this->CheckUnicodeType(pFileBuf, dwReadBytes);
240 if (m_LineEndings == EOL_AUTOLINE)
242 m_LineEndings = CheckLineEndings(pFileBuf, min(10000, dwReadBytes));
244 CloseHandle(hFile);
246 if (m_UnicodeType == CFileTextLines::BINARY)
248 m_sErrorString.Format(IDS_ERR_FILE_BINARY, (LPCTSTR)sFilePath);
249 delete [] pFileBuf;
250 return FALSE;
253 // we may have to convert the file content
254 if ((m_UnicodeType == UTF8)||(m_UnicodeType == UTF8BOM))
256 int ret = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)pFileBuf, dwReadBytes, NULL, 0);
257 wchar_t * pWideBuf = new wchar_t[ret];
258 int ret2 = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)pFileBuf, dwReadBytes, pWideBuf, ret);
259 if (ret2 == ret)
261 delete [] pFileBuf;
262 pFileBuf = pWideBuf;
263 dwReadBytes = ret2;
266 else if (m_UnicodeType == ASCII)
268 int ret = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, (LPCSTR)pFileBuf, dwReadBytes, NULL, 0);
269 wchar_t * pWideBuf = new wchar_t[ret];
270 int ret2 = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, (LPCSTR)pFileBuf, dwReadBytes, pWideBuf, ret);
271 if (ret2 == ret)
273 delete [] pFileBuf;
274 pFileBuf = pWideBuf;
275 dwReadBytes = ret2;
278 // fill in the lines into the array
279 wchar_t * pTextBuf = (wchar_t *)pFileBuf;
280 wchar_t * pLineStart = (wchar_t *)pFileBuf;
281 if (m_UnicodeType == UNICODE_LE)
283 // UTF16 have two bytes per char
284 dwReadBytes/=2;
286 if ((m_UnicodeType == UTF8BOM)||(m_UnicodeType == UNICODE_LE))
288 // ignore the BOM
289 ++pTextBuf;
290 ++pLineStart;
291 --dwReadBytes;
294 for (DWORD i = 0; i<dwReadBytes; ++i)
296 if (*pTextBuf == '\r')
298 if ((i + 1) < dwReadBytes)
300 if (*(pTextBuf+1) == '\n')
302 // crlf line ending
303 CString line(pLineStart, pTextBuf-pLineStart);
304 Add(line, EOL_CRLF);
305 pLineStart = pTextBuf+2;
306 ++pTextBuf;
307 ++i;
309 else
311 // cr line ending
312 CString line(pLineStart, pTextBuf-pLineStart);
313 Add(line, EOL_CR);
314 pLineStart =pTextBuf+1;
318 else if (*pTextBuf == '\n')
320 // lf line ending
321 CString line(pLineStart, pTextBuf-pLineStart);
322 Add(line, EOL_LF);
323 pLineStart =pTextBuf+1;
325 ++pTextBuf;
327 if (pLineStart < pTextBuf)
329 CString line(pLineStart, pTextBuf-pLineStart);
330 Add(line, EOL_NOENDING);
331 m_bReturnAtEnd = false;
333 else
334 m_bReturnAtEnd = true;
336 delete [] pFileBuf;
339 return TRUE;
342 void CFileTextLines::StripWhiteSpace(CString& sLine,DWORD dwIgnoreWhitespaces, bool blame)
344 if (blame)
346 if (sLine.GetLength() > 66)
347 sLine = sLine.Mid(66);
349 switch (dwIgnoreWhitespaces)
351 case 0:
352 // Compare whitespaces
353 // do nothing
354 break;
355 case 1:
356 // Ignore all whitespaces
357 sLine.TrimLeft(_T(" \t"));
358 sLine.TrimRight(_T(" \t"));
359 break;
360 case 2:
361 // Ignore leading whitespace
362 sLine.TrimLeft(_T(" \t"));
363 break;
364 case 3:
365 // Ignore ending whitespace
366 sLine.TrimRight(_T(" \t"));
367 break;
371 void CFileTextLines::StripAsciiWhiteSpace(CStringA& sLine,DWORD dwIgnoreWhitespaces, bool blame)
373 if (blame)
375 if (sLine.GetLength() > 66)
376 sLine = sLine.Mid(66);
378 switch (dwIgnoreWhitespaces)
380 case 0: // Compare whitespaces
381 // do nothing
382 break;
383 case 1:
384 // Ignore all whitespaces
385 StripAsciiWhiteSpace(sLine);
386 break;
387 case 2:
388 // Ignore leading whitespace
389 sLine.TrimLeft(" \t");
390 break;
391 case 3:
392 // Ignore leading whitespace
393 sLine.TrimRight(" \t");
394 break;
399 // Fast in-place removal of spaces and tabs from CStringA line
401 void CFileTextLines::StripAsciiWhiteSpace(CStringA& sLine)
403 int outputLen = 0;
404 char* pWriteChr = sLine.GetBuffer(sLine.GetLength());
405 const char* pReadChr = pWriteChr;
406 while(*pReadChr)
408 if(*pReadChr != ' ' && *pReadChr != '\t')
410 *pWriteChr++ = *pReadChr;
411 outputLen++;
413 ++pReadChr;
415 *pWriteChr = '\0';
416 sLine.ReleaseBuffer(outputLen);
419 BOOL CFileTextLines::Save(const CString& sFilePath, bool bSaveAsUTF8, DWORD dwIgnoreWhitespaces /*=0*/, BOOL bIgnoreCase /*= FALSE*/, bool bBlame /*= false*/)
423 CString destPath = sFilePath;
424 // now make sure that the destination directory exists
425 int ind = 0;
426 while (destPath.Find('\\', ind)>=2)
428 if (!PathIsDirectory(destPath.Left(destPath.Find('\\', ind))))
430 if (!CreateDirectory(destPath.Left(destPath.Find('\\', ind)), NULL))
431 return FALSE;
433 ind = destPath.Find('\\', ind)+1;
436 CStdioFile file; // Hugely faster than CFile for big file writes - because it uses buffering
437 if (!file.Open(sFilePath, CFile::modeCreate | CFile::modeWrite | CFile::typeBinary))
439 m_sErrorString.Format(IDS_ERR_FILE_OPEN, (LPCTSTR)sFilePath);
440 return FALSE;
442 if ((!bSaveAsUTF8)&&(m_UnicodeType == CFileTextLines::UNICODE_LE))
444 //first write the BOM
445 UINT16 wBOM = 0xFEFF;
446 file.Write(&wBOM, 2);
447 for (int i=0; i<GetCount(); i++)
449 CString sLine = GetAt(i);
450 EOL ending = GetLineEnding(i);
451 StripWhiteSpace(sLine,dwIgnoreWhitespaces, bBlame);
452 if (bIgnoreCase)
453 sLine = sLine.MakeLower();
454 file.Write((LPCTSTR)sLine, sLine.GetLength()*sizeof(TCHAR));
455 if (ending == EOL_AUTOLINE)
456 ending = m_LineEndings;
457 switch (ending)
459 case EOL_CR:
460 sLine = _T("\x0d");
461 break;
462 case EOL_CRLF:
463 case EOL_AUTOLINE:
464 sLine = _T("\x0d\x0a");
465 break;
466 case EOL_LF:
467 sLine = _T("\x0a");
468 break;
469 case EOL_LFCR:
470 sLine = _T("\x0a\x0d");
471 break;
473 if ((m_bReturnAtEnd)||(i != GetCount()-1))
474 file.Write((LPCTSTR)sLine, sLine.GetLength()*sizeof(TCHAR));
477 else if ((!bSaveAsUTF8)&&((m_UnicodeType == CFileTextLines::ASCII)||(m_UnicodeType == CFileTextLines::AUTOTYPE)))
479 for (int i=0; i< GetCount(); i++)
481 // Copy CString to 8 bit without conversion
482 CString sLineT = GetAt(i);
483 CStringA sLine = CStringA(sLineT);
484 EOL ending = GetLineEnding(i);
486 StripAsciiWhiteSpace(sLine,dwIgnoreWhitespaces, bBlame);
487 if (bIgnoreCase)
488 sLine = sLine.MakeLower();
489 if ((m_bReturnAtEnd)||(i != GetCount()-1))
491 if (ending == EOL_AUTOLINE)
492 ending = m_LineEndings;
493 switch (ending)
495 case EOL_CR:
496 sLine += '\x0d';
497 break;
498 case EOL_CRLF:
499 case EOL_AUTOLINE:
500 sLine.Append("\x0d\x0a", 2);
501 break;
502 case EOL_LF:
503 sLine += '\x0a';
504 break;
505 case EOL_LFCR:
506 sLine.Append("\x0a\x0d", 2);
507 break;
510 file.Write((LPCSTR)sLine, sLine.GetLength());
513 else if ((bSaveAsUTF8)||((m_UnicodeType == CFileTextLines::UTF8BOM)||(m_UnicodeType == CFileTextLines::UTF8)))
515 if (m_UnicodeType == CFileTextLines::UTF8BOM)
517 //first write the BOM
518 UINT16 wBOM = 0xBBEF;
519 file.Write(&wBOM, 2);
520 UINT8 uBOM = 0xBF;
521 file.Write(&uBOM, 1);
523 for (int i=0; i<GetCount(); i++)
525 CStringA sLine = CUnicodeUtils::GetUTF8(GetAt(i));
526 EOL ending = GetLineEnding(i);
527 StripAsciiWhiteSpace(sLine,dwIgnoreWhitespaces, bBlame);
528 if (bIgnoreCase)
529 sLine = sLine.MakeLower();
531 if ((m_bReturnAtEnd)||(i != GetCount()-1))
533 if (ending == EOL_AUTOLINE)
534 ending = m_LineEndings;
535 switch (ending)
537 case EOL_CR:
538 sLine += '\x0d';
539 break;
540 case EOL_CRLF:
541 case EOL_AUTOLINE:
542 sLine.Append("\x0d\x0a",2);
543 break;
544 case EOL_LF:
545 sLine += '\x0a';
546 break;
547 case EOL_LFCR:
548 sLine.Append("\x0a\x0d",2);
549 break;
552 file.Write((LPCSTR)sLine, sLine.GetLength());
555 file.Close();
557 catch (CException * e)
559 e->GetErrorMessage(m_sErrorString.GetBuffer(4096), 4096);
560 m_sErrorString.ReleaseBuffer();
561 e->Delete();
562 return FALSE;
564 return TRUE;
567 void CFileTextLines::SetErrorString()
569 LPVOID lpMsgBuf;
570 FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
571 FORMAT_MESSAGE_FROM_SYSTEM |
572 FORMAT_MESSAGE_IGNORE_INSERTS,
573 NULL,
574 ::GetLastError(),
575 MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language
576 (LPTSTR) &lpMsgBuf,
578 NULL
580 m_sErrorString = (LPCTSTR)lpMsgBuf;
581 LocalFree( lpMsgBuf );
584 void CFileTextLines::CopySettings(CFileTextLines * pFileToCopySettingsTo)
586 if (pFileToCopySettingsTo)
588 pFileToCopySettingsTo->m_UnicodeType = m_UnicodeType;
589 pFileToCopySettingsTo->m_LineEndings = m_LineEndings;