1 // TortoiseMerge - a Diff/Patch program
3 // Copyright (C) 2007-2009 - TortoiseSVN
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License
7 // as published by the Free Software Foundation; either version 2
8 // of the License, or (at your option) any later version.
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software Foundation,
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 #include "UnicodeUtils.h"
23 #include ".\filetextlines.h"
26 CFileTextLines::CFileTextLines(void)
30 CFileTextLines::~CFileTextLines(void)
34 CFileTextLines::UnicodeType
CFileTextLines::CheckUnicodeType(LPVOID pBuffer
, int cb
)
37 return CFileTextLines::ASCII
;
38 UINT16
* pVal
= (UINT16
*)pBuffer
;
39 UINT8
* pVal2
= (UINT8
*)(pVal
+1);
40 // scan the whole buffer for a 0x0000 sequence
41 // if found, we assume a binary file
42 for (int i
=0; i
<(cb
-2); i
=i
+2)
44 if (0x0000 == *pVal
++)
45 return CFileTextLines::BINARY
;
47 pVal
= (UINT16
*)pBuffer
;
49 return CFileTextLines::UNICODE_LE
;
55 return CFileTextLines::UTF8BOM
;
57 // check for illegal UTF8 chars
58 pVal2
= (UINT8
*)pBuffer
;
59 for (int i
=0; i
<cb
; ++i
)
61 if ((*pVal2
== 0xC0)||(*pVal2
== 0xC1)||(*pVal2
>= 0xF5))
62 return CFileTextLines::ASCII
;
65 pVal2
= (UINT8
*)pBuffer
;
67 bool bNonANSI
= false;
68 for (int i
=0; i
<(cb
-3); ++i
)
72 if ((*pVal2
& 0xE0)==0xC0)
75 if ((*pVal2
& 0xC0)!=0x80)
76 return CFileTextLines::ASCII
;
79 if ((*pVal2
& 0xF0)==0xE0)
82 if ((*pVal2
& 0xC0)!=0x80)
83 return CFileTextLines::ASCII
;
85 if ((*pVal2
& 0xC0)!=0x80)
86 return CFileTextLines::ASCII
;
89 if ((*pVal2
& 0xF8)==0xF0)
92 if ((*pVal2
& 0xC0)!=0x80)
93 return CFileTextLines::ASCII
;
95 if ((*pVal2
& 0xC0)!=0x80)
96 return CFileTextLines::ASCII
;
98 if ((*pVal2
& 0xC0)!=0x80)
99 return CFileTextLines::ASCII
;
105 return CFileTextLines::UTF8
;
106 if ((!bNonANSI
)&&(DWORD(CRegDWORD(_T("Software\\TortoiseMerge\\UseUTF8"), FALSE
))))
107 return CFileTextLines::UTF8
;
108 return CFileTextLines::ASCII
;
112 EOL
CFileTextLines::CheckLineEndings(LPVOID pBuffer
, int cb
)
114 EOL retval
= EOL_AUTOLINE
;
115 char * buf
= (char *)pBuffer
;
116 for (int i
=0; i
<cb
; i
++)
118 //now search the buffer for line endings
128 if (buf
[i
+2] == 0x0d)
140 else if (buf
[i
+1] == 0x0d)
149 else if (buf
[i
] == 0x0d)
158 if (buf
[i
+2] == 0x0a)
170 else if (buf
[i
+1] == 0x0a)
183 BOOL
CFileTextLines::Load(const CString
& sFilePath
, int lengthHint
/* = 0*/)
185 m_LineEndings
= EOL_AUTOLINE
;
186 m_UnicodeType
= CFileTextLines::AUTOTYPE
;
194 if (PathIsDirectory(sFilePath
))
196 m_sErrorString
.Format(IDS_ERR_FILE_NOTAFILE
, (LPCTSTR
)sFilePath
);
200 if (!PathFileExists(sFilePath
))
202 //file does not exist, so just return SUCCESS
206 HANDLE hFile
= CreateFile(sFilePath
, GENERIC_READ
, FILE_SHARE_READ
, NULL
, OPEN_EXISTING
, NULL
, NULL
);
207 if (hFile
== INVALID_HANDLE_VALUE
)
214 if (!GetFileSizeEx(hFile
, &fsize
))
222 // file is way too big for us
224 m_sErrorString
.LoadString(IDS_ERR_FILE_TOOBIG
);
228 LPVOID pFileBuf
= new BYTE
[fsize
.LowPart
];
229 DWORD dwReadBytes
= 0;
230 if (!ReadFile(hFile
, pFileBuf
, fsize
.LowPart
, &dwReadBytes
, NULL
))
236 if (m_UnicodeType
== CFileTextLines::AUTOTYPE
)
238 m_UnicodeType
= this->CheckUnicodeType(pFileBuf
, dwReadBytes
);
240 if (m_LineEndings
== EOL_AUTOLINE
)
242 m_LineEndings
= CheckLineEndings(pFileBuf
, min(10000, dwReadBytes
));
246 if (m_UnicodeType
== CFileTextLines::BINARY
)
248 m_sErrorString
.Format(IDS_ERR_FILE_BINARY
, (LPCTSTR
)sFilePath
);
253 // we may have to convert the file content
254 if ((m_UnicodeType
== UTF8
)||(m_UnicodeType
== UTF8BOM
))
256 int ret
= MultiByteToWideChar(CP_UTF8
, 0, (LPCSTR
)pFileBuf
, dwReadBytes
, NULL
, 0);
257 wchar_t * pWideBuf
= new wchar_t[ret
];
258 int ret2
= MultiByteToWideChar(CP_UTF8
, 0, (LPCSTR
)pFileBuf
, dwReadBytes
, pWideBuf
, ret
);
266 else if (m_UnicodeType
== ASCII
)
268 int ret
= MultiByteToWideChar(CP_ACP
, MB_PRECOMPOSED
, (LPCSTR
)pFileBuf
, dwReadBytes
, NULL
, 0);
269 wchar_t * pWideBuf
= new wchar_t[ret
];
270 int ret2
= MultiByteToWideChar(CP_ACP
, MB_PRECOMPOSED
, (LPCSTR
)pFileBuf
, dwReadBytes
, pWideBuf
, ret
);
278 // fill in the lines into the array
279 wchar_t * pTextBuf
= (wchar_t *)pFileBuf
;
280 wchar_t * pLineStart
= (wchar_t *)pFileBuf
;
281 if (m_UnicodeType
== UNICODE_LE
)
283 // UTF16 have two bytes per char
286 if ((m_UnicodeType
== UTF8BOM
)||(m_UnicodeType
== UNICODE_LE
))
294 for (DWORD i
= 0; i
<dwReadBytes
; ++i
)
296 if (*pTextBuf
== '\r')
298 if ((i
+ 1) < dwReadBytes
)
300 if (*(pTextBuf
+1) == '\n')
303 CString
line(pLineStart
, pTextBuf
-pLineStart
);
305 pLineStart
= pTextBuf
+2;
312 CString
line(pLineStart
, pTextBuf
-pLineStart
);
314 pLineStart
=pTextBuf
+1;
318 else if (*pTextBuf
== '\n')
321 CString
line(pLineStart
, pTextBuf
-pLineStart
);
323 pLineStart
=pTextBuf
+1;
327 if (pLineStart
< pTextBuf
)
329 CString
line(pLineStart
, pTextBuf
-pLineStart
);
330 Add(line
, EOL_NOENDING
);
331 m_bReturnAtEnd
= false;
334 m_bReturnAtEnd
= true;
342 void CFileTextLines::StripWhiteSpace(CString
& sLine
,DWORD dwIgnoreWhitespaces
, bool blame
)
346 if (sLine
.GetLength() > 66)
347 sLine
= sLine
.Mid(66);
349 switch (dwIgnoreWhitespaces
)
352 // Compare whitespaces
356 // Ignore all whitespaces
357 sLine
.TrimLeft(_T(" \t"));
358 sLine
.TrimRight(_T(" \t"));
361 // Ignore leading whitespace
362 sLine
.TrimLeft(_T(" \t"));
365 // Ignore ending whitespace
366 sLine
.TrimRight(_T(" \t"));
371 void CFileTextLines::StripAsciiWhiteSpace(CStringA
& sLine
,DWORD dwIgnoreWhitespaces
, bool blame
)
375 if (sLine
.GetLength() > 66)
376 sLine
= sLine
.Mid(66);
378 switch (dwIgnoreWhitespaces
)
380 case 0: // Compare whitespaces
384 // Ignore all whitespaces
385 StripAsciiWhiteSpace(sLine
);
388 // Ignore leading whitespace
389 sLine
.TrimLeft(" \t");
392 // Ignore leading whitespace
393 sLine
.TrimRight(" \t");
399 // Fast in-place removal of spaces and tabs from CStringA line
401 void CFileTextLines::StripAsciiWhiteSpace(CStringA
& sLine
)
404 char* pWriteChr
= sLine
.GetBuffer(sLine
.GetLength());
405 const char* pReadChr
= pWriteChr
;
408 if(*pReadChr
!= ' ' && *pReadChr
!= '\t')
410 *pWriteChr
++ = *pReadChr
;
416 sLine
.ReleaseBuffer(outputLen
);
419 BOOL
CFileTextLines::Save(const CString
& sFilePath
, bool bSaveAsUTF8
, DWORD dwIgnoreWhitespaces
/*=0*/, BOOL bIgnoreCase
/*= FALSE*/, bool bBlame
/*= false*/)
423 CString destPath
= sFilePath
;
424 // now make sure that the destination directory exists
426 while (destPath
.Find('\\', ind
)>=2)
428 if (!PathIsDirectory(destPath
.Left(destPath
.Find('\\', ind
))))
430 if (!CreateDirectory(destPath
.Left(destPath
.Find('\\', ind
)), NULL
))
433 ind
= destPath
.Find('\\', ind
)+1;
436 CStdioFile file
; // Hugely faster than CFile for big file writes - because it uses buffering
437 if (!file
.Open(sFilePath
, CFile::modeCreate
| CFile::modeWrite
| CFile::typeBinary
))
439 m_sErrorString
.Format(IDS_ERR_FILE_OPEN
, (LPCTSTR
)sFilePath
);
442 if ((!bSaveAsUTF8
)&&(m_UnicodeType
== CFileTextLines::UNICODE_LE
))
444 //first write the BOM
445 UINT16 wBOM
= 0xFEFF;
446 file
.Write(&wBOM
, 2);
447 for (int i
=0; i
<GetCount(); i
++)
449 CString sLine
= GetAt(i
);
450 EOL ending
= GetLineEnding(i
);
451 StripWhiteSpace(sLine
,dwIgnoreWhitespaces
, bBlame
);
453 sLine
= sLine
.MakeLower();
454 file
.Write((LPCTSTR
)sLine
, sLine
.GetLength()*sizeof(TCHAR
));
455 if (ending
== EOL_AUTOLINE
)
456 ending
= m_LineEndings
;
464 sLine
= _T("\x0d\x0a");
470 sLine
= _T("\x0a\x0d");
473 if ((m_bReturnAtEnd
)||(i
!= GetCount()-1))
474 file
.Write((LPCTSTR
)sLine
, sLine
.GetLength()*sizeof(TCHAR
));
477 else if ((!bSaveAsUTF8
)&&((m_UnicodeType
== CFileTextLines::ASCII
)||(m_UnicodeType
== CFileTextLines::AUTOTYPE
)))
479 for (int i
=0; i
< GetCount(); i
++)
481 // Copy CString to 8 bit without conversion
482 CString sLineT
= GetAt(i
);
483 CStringA sLine
= CStringA(sLineT
);
484 EOL ending
= GetLineEnding(i
);
486 StripAsciiWhiteSpace(sLine
,dwIgnoreWhitespaces
, bBlame
);
488 sLine
= sLine
.MakeLower();
489 if ((m_bReturnAtEnd
)||(i
!= GetCount()-1))
491 if (ending
== EOL_AUTOLINE
)
492 ending
= m_LineEndings
;
500 sLine
.Append("\x0d\x0a", 2);
506 sLine
.Append("\x0a\x0d", 2);
510 file
.Write((LPCSTR
)sLine
, sLine
.GetLength());
513 else if ((bSaveAsUTF8
)||((m_UnicodeType
== CFileTextLines::UTF8BOM
)||(m_UnicodeType
== CFileTextLines::UTF8
)))
515 if (m_UnicodeType
== CFileTextLines::UTF8BOM
)
517 //first write the BOM
518 UINT16 wBOM
= 0xBBEF;
519 file
.Write(&wBOM
, 2);
521 file
.Write(&uBOM
, 1);
523 for (int i
=0; i
<GetCount(); i
++)
525 CStringA sLine
= CUnicodeUtils::GetUTF8(GetAt(i
));
526 EOL ending
= GetLineEnding(i
);
527 StripAsciiWhiteSpace(sLine
,dwIgnoreWhitespaces
, bBlame
);
529 sLine
= sLine
.MakeLower();
531 if ((m_bReturnAtEnd
)||(i
!= GetCount()-1))
533 if (ending
== EOL_AUTOLINE
)
534 ending
= m_LineEndings
;
542 sLine
.Append("\x0d\x0a",2);
548 sLine
.Append("\x0a\x0d",2);
552 file
.Write((LPCSTR
)sLine
, sLine
.GetLength());
557 catch (CException
* e
)
559 e
->GetErrorMessage(m_sErrorString
.GetBuffer(4096), 4096);
560 m_sErrorString
.ReleaseBuffer();
567 void CFileTextLines::SetErrorString()
570 FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER
|
571 FORMAT_MESSAGE_FROM_SYSTEM
|
572 FORMAT_MESSAGE_IGNORE_INSERTS
,
575 MAKELANGID(LANG_NEUTRAL
, SUBLANG_DEFAULT
), // Default language
580 m_sErrorString
= (LPCTSTR
)lpMsgBuf
;
581 LocalFree( lpMsgBuf
);
584 void CFileTextLines::CopySettings(CFileTextLines
* pFileToCopySettingsTo
)
586 if (pFileToCopySettingsTo
)
588 pFileToCopySettingsTo
->m_UnicodeType
= m_UnicodeType
;
589 pFileToCopySettingsTo
->m_LineEndings
= m_LineEndings
;