1 // TortoiseMerge - a Diff/Patch program
3 // Copyright (C) 2007-2011 - TortoiseSVN
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License
7 // as published by the Free Software Foundation; either version 2
8 // of the License, or (at your option) any later version.
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software Foundation,
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 #include "UnicodeUtils.h"
23 #include ".\filetextlines.h"
24 #include "FormatMessageWrapper.h"
25 #include "SmartHandle.h"
27 CFileTextLines::CFileTextLines(void)
28 : m_UnicodeType(CFileTextLines::AUTOTYPE
)
29 , m_LineEndings(EOL_AUTOLINE
)
30 , m_bReturnAtEnd(false)
34 CFileTextLines::~CFileTextLines(void)
38 CFileTextLines::UnicodeType
CFileTextLines::CheckUnicodeType(LPVOID pBuffer
, int cb
)
41 return CFileTextLines::ASCII
;
42 UINT16
* pVal16
= (UINT16
*)pBuffer
;
43 UINT8
* pVal8
= (UINT8
*)(pVal16
+1);
44 // scan the whole buffer for a 0x0000 sequence
45 // if found, we assume a binary file
46 for (int i
=0; i
<(cb
-2); i
=i
+2)
48 if (0x0000 == *pVal16
++)
49 return CFileTextLines::BINARY
;
51 pVal16
= (UINT16
*)pBuffer
;
52 if (*pVal16
== 0xFEFF)
53 return CFileTextLines::UNICODE_LE
;
56 if (*pVal16
== 0xBBEF)
59 return CFileTextLines::UTF8BOM
;
61 // check for illegal UTF8 chars
62 pVal8
= (UINT8
*)pBuffer
;
63 for (int i
=0; i
<cb
; ++i
)
65 if ((*pVal8
== 0xC0)||(*pVal8
== 0xC1)||(*pVal8
>= 0xF5))
66 return CFileTextLines::ASCII
;
69 pVal8
= (UINT8
*)pBuffer
;
71 bool bNonANSI
= false;
72 for (int i
=0; i
<(cb
-3); ++i
)
76 if ((*pVal8
& 0xE0)==0xC0)
79 if ((*pVal8
& 0xC0)!=0x80)
80 return CFileTextLines::ASCII
;
83 if ((*pVal8
& 0xF0)==0xE0)
86 if ((*pVal8
& 0xC0)!=0x80)
87 return CFileTextLines::ASCII
;
89 if ((*pVal8
& 0xC0)!=0x80)
90 return CFileTextLines::ASCII
;
93 if ((*pVal8
& 0xF8)==0xF0)
96 if ((*pVal8
& 0xC0)!=0x80)
97 return CFileTextLines::ASCII
;
99 if ((*pVal8
& 0xC0)!=0x80)
100 return CFileTextLines::ASCII
;
102 if ((*pVal8
& 0xC0)!=0x80)
103 return CFileTextLines::ASCII
;
109 return CFileTextLines::UTF8
;
110 if ((!bNonANSI
)&&(DWORD(CRegDWORD(_T("Software\\TortoiseMerge\\UseUTF8"), FALSE
))))
111 return CFileTextLines::UTF8
;
112 return CFileTextLines::ASCII
;
116 EOL
CFileTextLines::CheckLineEndings(LPVOID pBuffer
, int cb
)
118 EOL retval
= EOL_AUTOLINE
;
119 char * buf
= (char *)pBuffer
;
120 for (int i
=0; i
<cb
; i
++)
122 //now search the buffer for line endings
132 if (buf
[i
+2] == 0x0d)
144 else if (buf
[i
+1] == 0x0d)
153 else if (buf
[i
] == 0x0d)
162 if (buf
[i
+2] == 0x0a)
174 else if (buf
[i
+1] == 0x0a)
187 BOOL
CFileTextLines::Load(const CString
& sFilePath
, int lengthHint
/* = 0*/)
189 m_LineEndings
= EOL_AUTOLINE
;
190 m_UnicodeType
= CFileTextLines::AUTOTYPE
;
198 if (PathIsDirectory(sFilePath
))
200 m_sErrorString
.Format(IDS_ERR_FILE_NOTAFILE
, (LPCTSTR
)sFilePath
);
204 if (!PathFileExists(sFilePath
))
206 //file does not exist, so just return SUCCESS
210 CAutoFile hFile
= CreateFile(sFilePath
, GENERIC_READ
, FILE_SHARE_READ
, NULL
, OPEN_EXISTING
, NULL
, NULL
);
218 if (!GetFileSizeEx(hFile
, &fsize
))
225 // file is way too big for us
226 m_sErrorString
.LoadString(IDS_ERR_FILE_TOOBIG
);
230 // If new[] was done for type T delete[] must be called on a pointer of type T*,
231 // otherwise the behavior is undefined.
232 // +1 is to address possible truncation when integer division is done
233 wchar_t* pFileBuf
= new wchar_t[fsize
.LowPart
/sizeof(wchar_t) + 1];
234 DWORD dwReadBytes
= 0;
235 if (!ReadFile(hFile
, pFileBuf
, fsize
.LowPart
, &dwReadBytes
, NULL
))
241 if (m_UnicodeType
== CFileTextLines::AUTOTYPE
)
243 m_UnicodeType
= this->CheckUnicodeType(pFileBuf
, dwReadBytes
);
245 if (m_LineEndings
== EOL_AUTOLINE
)
247 m_LineEndings
= CheckLineEndings(pFileBuf
, min(10000, dwReadBytes
));
251 if (m_UnicodeType
== CFileTextLines::BINARY
)
253 m_sErrorString
.Format(IDS_ERR_FILE_BINARY
, (LPCTSTR
)sFilePath
);
258 // we may have to convert the file content
259 if ((m_UnicodeType
== UTF8
)||(m_UnicodeType
== UTF8BOM
))
261 int ret
= MultiByteToWideChar(CP_UTF8
, 0, (LPCSTR
)pFileBuf
, dwReadBytes
, NULL
, 0);
262 wchar_t * pWideBuf
= new wchar_t[ret
];
263 int ret2
= MultiByteToWideChar(CP_UTF8
, 0, (LPCSTR
)pFileBuf
, dwReadBytes
, pWideBuf
, ret
);
272 else if (m_UnicodeType
== ASCII
)
274 int ret
= MultiByteToWideChar(CP_ACP
, MB_PRECOMPOSED
, (LPCSTR
)pFileBuf
, dwReadBytes
, NULL
, 0);
275 wchar_t * pWideBuf
= new wchar_t[ret
];
276 int ret2
= MultiByteToWideChar(CP_ACP
, MB_PRECOMPOSED
, (LPCSTR
)pFileBuf
, dwReadBytes
, pWideBuf
, ret
);
286 // fill in the lines into the array
287 wchar_t * pTextBuf
= pFileBuf
;
288 wchar_t * pLineStart
= pFileBuf
;
289 if (m_UnicodeType
== UNICODE_LE
)
291 // UTF16 have two bytes per char
294 if ((m_UnicodeType
== UTF8BOM
)||(m_UnicodeType
== UNICODE_LE
))
302 for (DWORD i
= 0; i
<dwReadBytes
; ++i
)
304 if (*pTextBuf
== '\r')
306 if ((i
+ 1) < dwReadBytes
)
308 if (*(pTextBuf
+1) == '\n')
311 CString
line(pLineStart
, (int)(pTextBuf
-pLineStart
));
313 pLineStart
= pTextBuf
+2;
320 CString
line(pLineStart
, (int)(pTextBuf
-pLineStart
));
322 pLineStart
=pTextBuf
+1;
326 else if (*pTextBuf
== '\n')
329 CString
line(pLineStart
, (int)(pTextBuf
-pLineStart
));
331 pLineStart
=pTextBuf
+1;
335 if (pLineStart
< pTextBuf
)
337 CString
line(pLineStart
, (int)(pTextBuf
-pLineStart
));
338 Add(line
, EOL_NOENDING
);
339 m_bReturnAtEnd
= false;
342 m_bReturnAtEnd
= true;
349 void CFileTextLines::StripWhiteSpace(CString
& sLine
,DWORD dwIgnoreWhitespaces
, bool blame
)
353 if (sLine
.GetLength() > 66)
354 sLine
= sLine
.Mid(66);
356 switch (dwIgnoreWhitespaces
)
359 // Compare whitespaces
363 // Ignore all whitespaces
364 sLine
.TrimLeft(_T(" \t"));
365 sLine
.TrimRight(_T(" \t"));
368 // Ignore leading whitespace
369 sLine
.TrimLeft(_T(" \t"));
372 // Ignore ending whitespace
373 sLine
.TrimRight(_T(" \t"));
378 void CFileTextLines::StripAsciiWhiteSpace(CStringA
& sLine
,DWORD dwIgnoreWhitespaces
, bool blame
)
382 if (sLine
.GetLength() > 66)
383 sLine
= sLine
.Mid(66);
385 switch (dwIgnoreWhitespaces
)
387 case 0: // Compare whitespaces
391 // Ignore all whitespaces
392 StripAsciiWhiteSpace(sLine
);
395 // Ignore leading whitespace
396 sLine
.TrimLeft(" \t");
399 // Ignore leading whitespace
400 sLine
.TrimRight(" \t");
406 // Fast in-place removal of spaces and tabs from CStringA line
408 void CFileTextLines::StripAsciiWhiteSpace(CStringA
& sLine
)
411 char* pWriteChr
= sLine
.GetBuffer(sLine
.GetLength());
412 const char* pReadChr
= pWriteChr
;
415 if(*pReadChr
!= ' ' && *pReadChr
!= '\t')
417 *pWriteChr
++ = *pReadChr
;
423 sLine
.ReleaseBuffer(outputLen
);
426 BOOL
CFileTextLines::Save(const CString
& sFilePath
, bool bSaveAsUTF8
, DWORD dwIgnoreWhitespaces
/*=0*/, BOOL bIgnoreCase
/*= FALSE*/, bool bBlame
/*= false*/)
430 CString destPath
= sFilePath
;
431 // now make sure that the destination directory exists
433 while (destPath
.Find('\\', ind
)>=2)
435 if (!PathIsDirectory(destPath
.Left(destPath
.Find('\\', ind
))))
437 if (!CreateDirectory(destPath
.Left(destPath
.Find('\\', ind
)), NULL
))
440 ind
= destPath
.Find('\\', ind
)+1;
443 CStdioFile file
; // Hugely faster than CFile for big file writes - because it uses buffering
444 if (!file
.Open(sFilePath
, CFile::modeCreate
| CFile::modeWrite
| CFile::typeBinary
))
446 m_sErrorString
.Format(IDS_ERR_FILE_OPEN
, (LPCTSTR
)sFilePath
);
449 if ((!bSaveAsUTF8
)&&(m_UnicodeType
== CFileTextLines::UNICODE_LE
))
451 //first write the BOM
452 UINT16 wBOM
= 0xFEFF;
453 file
.Write(&wBOM
, 2);
454 for (int i
=0; i
<GetCount(); i
++)
456 CString sLine
= GetAt(i
);
457 EOL ending
= GetLineEnding(i
);
458 StripWhiteSpace(sLine
,dwIgnoreWhitespaces
, bBlame
);
460 sLine
= sLine
.MakeLower();
461 file
.Write((LPCTSTR
)sLine
, sLine
.GetLength()*sizeof(TCHAR
));
462 if (ending
== EOL_AUTOLINE
)
463 ending
= m_LineEndings
;
471 sLine
= _T("\x0d\x0a");
477 sLine
= _T("\x0a\x0d");
483 if ((m_bReturnAtEnd
)||(i
!= GetCount()-1))
484 file
.Write((LPCTSTR
)sLine
, sLine
.GetLength()*sizeof(TCHAR
));
487 else if ((!bSaveAsUTF8
)&&((m_UnicodeType
== CFileTextLines::ASCII
)||(m_UnicodeType
== CFileTextLines::AUTOTYPE
)))
489 for (int i
=0; i
< GetCount(); i
++)
491 // Copy CString to 8 bit without conversion
492 CString sLineT
= GetAt(i
);
493 CStringA sLine
= CStringA(sLineT
);
494 EOL ending
= GetLineEnding(i
);
496 StripAsciiWhiteSpace(sLine
,dwIgnoreWhitespaces
, bBlame
);
498 sLine
= sLine
.MakeLower();
499 if ((m_bReturnAtEnd
)||(i
!= GetCount()-1))
501 if (ending
== EOL_AUTOLINE
)
502 ending
= m_LineEndings
;
510 sLine
.Append("\x0d\x0a", 2);
516 sLine
.Append("\x0a\x0d", 2);
520 file
.Write((LPCSTR
)sLine
, sLine
.GetLength());
523 else if ((bSaveAsUTF8
)||((m_UnicodeType
== CFileTextLines::UTF8BOM
)||(m_UnicodeType
== CFileTextLines::UTF8
)))
525 if (m_UnicodeType
== CFileTextLines::UTF8BOM
)
527 //first write the BOM
528 UINT16 wBOM
= 0xBBEF;
529 file
.Write(&wBOM
, 2);
531 file
.Write(&uBOM
, 1);
533 for (int i
=0; i
<GetCount(); i
++)
535 CStringA sLine
= CUnicodeUtils::GetUTF8(GetAt(i
));
536 EOL ending
= GetLineEnding(i
);
537 StripAsciiWhiteSpace(sLine
,dwIgnoreWhitespaces
, bBlame
);
539 sLine
= sLine
.MakeLower();
541 if ((m_bReturnAtEnd
)||(i
!= GetCount()-1))
543 if (ending
== EOL_AUTOLINE
)
544 ending
= m_LineEndings
;
552 sLine
.Append("\x0d\x0a",2);
558 sLine
.Append("\x0a\x0d",2);
562 file
.Write((LPCSTR
)sLine
, sLine
.GetLength());
567 catch (CException
* e
)
569 e
->GetErrorMessage(m_sErrorString
.GetBuffer(4096), 4096);
570 m_sErrorString
.ReleaseBuffer();
577 void CFileTextLines::SetErrorString()
579 m_sErrorString
= CFormatMessageWrapper();
582 void CFileTextLines::CopySettings(CFileTextLines
* pFileToCopySettingsTo
)
584 if (pFileToCopySettingsTo
)
586 pFileToCopySettingsTo
->m_UnicodeType
= m_UnicodeType
;
587 pFileToCopySettingsTo
->m_LineEndings
= m_LineEndings
;
588 pFileToCopySettingsTo
->m_bReturnAtEnd
= m_bReturnAtEnd
;