Remove always-true if-clause
[TortoiseGit.git] / src / TortoiseMerge / FileTextLines.cpp
blobb80a8ef7dd0f1045c83cb3d2b5bf27fbe60c87e8
1 // TortoiseGitMerge - a Diff/Patch program
3 // Copyright (C) 2007-2016 - TortoiseSVN
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License
7 // as published by the Free Software Foundation; either version 2
8 // of the License, or (at your option) any later version.
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software Foundation,
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 #include "stdafx.h"
20 #include "resource.h"
21 #include "UnicodeUtils.h"
22 #include "registry.h"
23 #include "FileTextLines.h"
24 #include "FormatMessageWrapper.h"
25 #include "SmartHandle.h"
27 wchar_t inline WideCharSwap(wchar_t nValue)
29 return (((nValue>> 8)) | (nValue << 8));
30 //return _byteswap_ushort(nValue);
33 UINT64 inline WordSwapBytes(UINT64 nValue)
35 return ((nValue&0xff00ff00ff00ff)<<8) | ((nValue>>8)&0xff00ff00ff00ff); // swap BYTESs in WORDs
38 UINT32 inline DwordSwapBytes(UINT32 nValue)
40 UINT32 nRet = (nValue<<16) | (nValue>>16); // swap WORDs
41 nRet = ((nRet&0xff00ff)<<8) | ((nRet>>8)&0xff00ff); // swap BYTESs in WORDs
42 return nRet;
43 //return _byteswap_ulong(nValue);
46 UINT64 inline DwordSwapBytes(UINT64 nValue)
48 UINT64 nRet = ((nValue&0xffff0000ffffL)<<16) | ((nValue>>16)&0xffff0000ffffL); // swap WORDs in DWORDs
49 nRet = ((nRet&0xff00ff00ff00ff)<<8) | ((nRet>>8)&0xff00ff00ff00ff); // swap BYTESs in WORDs
50 return nRet;
53 CFileTextLines::CFileTextLines(void)
54 : m_bNeedsConversion(false)
55 , m_bKeepEncoding(false)
57 m_SaveParams.m_UnicodeType = CFileTextLines::AUTOTYPE;
58 m_SaveParams.m_LineEndings = EOL_AUTOLINE;
61 CFileTextLines::~CFileTextLines(void)
65 CFileTextLines::UnicodeType CFileTextLines::CheckUnicodeType(LPVOID pBuffer, int cb)
67 if (cb < 2)
68 return CFileTextLines::ASCII;
69 const UINT32 * const pVal32 = (UINT32 *)pBuffer;
70 const UINT16 * const pVal16 = (UINT16 *)pBuffer;
71 const UINT8 * const pVal8 = (UINT8 *)pBuffer;
72 // scan the whole buffer for a 0x00000000 sequence
73 // if found, we assume a binary file
74 int nDwords = cb/4;
75 for (int j=0; j<nDwords; ++j)
77 if (0x00000000 == pVal32[j])
78 return CFileTextLines::BINARY;
80 if (cb >=4 )
82 if (*pVal32 == 0x0000FEFF)
84 return CFileTextLines::UTF32_LE;
86 if (*pVal32 == 0xFFFE0000)
88 return CFileTextLines::UTF32_BE;
91 if (*pVal16 == 0xFEFF)
93 return CFileTextLines::UTF16_LEBOM;
95 if (*pVal16 == 0xFFFE)
97 return CFileTextLines::UTF16_BEBOM;
99 if (cb < 3)
100 return CFileTextLines::ASCII;
101 if (*pVal16 == 0xBBEF)
103 if (pVal8[2] == 0xBF)
104 return CFileTextLines::UTF8BOM;
106 // check for illegal UTF8 sequences
107 bool bNonANSI = false;
108 int nNeedData = 0;
109 int i=0;
110 int nullcount = 0;
111 for (; i < cb; ++i)
113 if (pVal8[i] == 0)
115 ++nullcount;
116 // count the null chars, we do not want to treat an ASCII/UTF8 file
117 // as UTF16 just because of some null chars that might be accidentally
118 // in the file.
119 // Use an arbitrary value of one fiftieth of the file length as
120 // the limit after which a file is considered UTF16.
121 if (nullcount >(cb / 50))
123 // null-chars are not allowed for ASCII or UTF8, that means
124 // this file is most likely UTF16 encoded
125 if (i % 2)
126 return CFileTextLines::UTF16_LE;
127 else
128 return CFileTextLines::UTF16_BE;
131 if ((pVal8[i] & 0x80) != 0) // non ASCII
133 bNonANSI = true;
134 break;
137 // check remaining text for UTF-8 validity
138 for (; i<cb; ++i)
140 UINT8 zChar = pVal8[i];
141 if ((zChar & 0x80)==0) // Ascii
143 if (zChar == 0)
145 ++nullcount;
146 // count the null chars, we do not want to treat an ASCII/UTF8 file
147 // as UTF16 just because of some null chars that might be accidentally
148 // in the file.
149 // Use an arbitrary value of one fiftieth of the file length as
150 // the limit after which a file is considered UTF16.
151 if (nullcount > (cb / 50))
153 // null-chars are not allowed for ASCII or UTF8, that means
154 // this file is most likely UTF16 encoded
155 if (i%2)
156 return CFileTextLines::UTF16_LE;
157 else
158 return CFileTextLines::UTF16_BE;
160 nNeedData = 0;
162 else if (nNeedData)
164 return CFileTextLines::ASCII;
166 continue;
168 if ((zChar & 0x40)==0) // top bit
170 if (!nNeedData)
171 return CFileTextLines::ASCII;
172 --nNeedData;
174 else if (nNeedData)
176 return CFileTextLines::ASCII;
178 else if ((zChar & 0x20)==0) // top two bits
180 if (zChar<=0xC1)
181 return CFileTextLines::ASCII;
182 nNeedData = 1;
184 else if ((zChar & 0x10)==0) // top three bits
186 nNeedData = 2;
188 else if ((zChar & 0x08)==0) // top four bits
190 if (zChar>=0xf5)
191 return CFileTextLines::ASCII;
192 nNeedData = 3;
194 else
195 return CFileTextLines::ASCII;
197 if (bNonANSI && nNeedData==0)
198 // if get here thru nonAscii and no missing data left then its valid UTF8
199 return CFileTextLines::UTF8;
200 if ((!bNonANSI)&&(DWORD(CRegDWORD(_T("Software\\TortoiseGitMerge\\UseUTF8"), FALSE))))
201 return CFileTextLines::UTF8;
202 return CFileTextLines::ASCII;
206 BOOL CFileTextLines::Load(const CString& sFilePath, int lengthHint /* = 0*/)
208 WCHAR exceptionError[1000] = {0};
209 m_SaveParams.m_LineEndings = EOL_AUTOLINE;
210 if (!m_bKeepEncoding)
211 m_SaveParams.m_UnicodeType = CFileTextLines::AUTOTYPE;
212 RemoveAll();
213 if(lengthHint != 0)
215 Reserve(lengthHint);
218 if (PathIsDirectory(sFilePath))
220 m_sErrorString.Format(IDS_ERR_FILE_NOTAFILE, (LPCTSTR)sFilePath);
221 return FALSE;
224 if (!PathFileExists(sFilePath))
226 //file does not exist, so just return SUCCESS
227 return TRUE;
230 CAutoFile hFile = CreateFile(sFilePath, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_DELETE | FILE_SHARE_WRITE, nullptr, OPEN_EXISTING, 0, nullptr);
231 if (!hFile)
233 SetErrorString();
234 return FALSE;
237 LARGE_INTEGER fsize;
238 if (!GetFileSizeEx(hFile, &fsize))
240 SetErrorString();
241 return FALSE;
243 if (fsize.HighPart)
245 // file is way too big for us
246 m_sErrorString.LoadString(IDS_ERR_FILE_TOOBIG);
247 return FALSE;
250 // create buffer
251 // If new[] was done for type T delete[] must be called on a pointer of type T*,
252 // otherwise the behavior is undefined.
253 // +1 is to address possible truncation when integer division is done
254 CBuffer oFile;
257 oFile.SetLength(fsize.LowPart);
259 catch (CMemoryException* e)
261 e->GetErrorMessage(exceptionError, _countof(exceptionError));
262 m_sErrorString = exceptionError;
263 return FALSE;
266 // load file
267 DWORD dwReadBytes = 0;
268 if (!ReadFile(hFile, (void *)oFile, fsize.LowPart, &dwReadBytes, NULL))
270 SetErrorString();
271 return FALSE;
273 hFile.CloseHandle();
275 // detect type
276 if (m_SaveParams.m_UnicodeType == CFileTextLines::AUTOTYPE)
278 m_SaveParams.m_UnicodeType = this->CheckUnicodeType((LPVOID)oFile, dwReadBytes);
280 // enforce conversion for all but ASCII and UTF8 type
281 m_bNeedsConversion = (m_SaveParams.m_UnicodeType != CFileTextLines::UTF8) && (m_SaveParams.m_UnicodeType != CFileTextLines::ASCII);
283 // we may have to convert the file content - CString is UTF16LE
286 CBaseFilter * pFilter = NULL;
287 switch (m_SaveParams.m_UnicodeType)
289 case BINARY:
290 m_sErrorString.Format(IDS_ERR_FILE_BINARY, (LPCTSTR)sFilePath);
291 return FALSE;
292 case UTF8:
293 case UTF8BOM:
294 pFilter = new CUtf8Filter(NULL);
295 break;
296 default:
297 case ASCII:
298 pFilter = new CAsciiFilter(NULL);
299 break;
300 case UTF16_BE:
301 case UTF16_BEBOM:
302 pFilter = new CUtf16beFilter(NULL);
303 break;
304 case UTF16_LE:
305 case UTF16_LEBOM:
306 pFilter = new CUtf16leFilter(NULL);
307 break;
308 case UTF32_BE:
309 pFilter = new CUtf32beFilter(NULL);
310 break;
311 case UTF32_LE:
312 pFilter = new CUtf32leFilter(NULL);
313 break;
315 pFilter->Decode(oFile);
316 delete pFilter;
318 catch (CMemoryException* e)
320 e->GetErrorMessage(exceptionError, _countof(exceptionError));
321 m_sErrorString = exceptionError;
322 return FALSE;
325 int nReadChars=oFile.GetLength()/sizeof(wchar_t);
326 wchar_t * pTextBuf = (wchar_t *)oFile;
327 wchar_t * pLineStart = pTextBuf;
328 if ((m_SaveParams.m_UnicodeType == UTF8BOM)
329 || (m_SaveParams.m_UnicodeType == UTF16_LEBOM)
330 || (m_SaveParams.m_UnicodeType == UTF16_BEBOM)
331 || (m_SaveParams.m_UnicodeType == UTF32_LE)
332 || (m_SaveParams.m_UnicodeType == UTF32_BE))
334 // ignore the BOM
335 ++pTextBuf;
336 ++pLineStart;
337 --nReadChars;
340 // fill in the lines into the array
341 size_t countEOLs[EOL__COUNT] = { 0 };
342 CFileTextLine oTextLine;
343 for (int i = nReadChars; i; --i)
345 EOL eEol;
346 switch (*pTextBuf++)
348 case '\r':
349 // crlf line ending or cr line ending
350 eEol = ((i > 1) && *(pTextBuf) == '\n') ? EOL_CRLF : EOL_CR;
351 break;
352 case '\n':
353 // lfcr line ending or lf line ending
354 eEol = ((i > 1) && *(pTextBuf) == '\r') ? EOL_LFCR : EOL_LF;
355 if (eEol == EOL_LFCR)
357 // LFCR is very rare on Windows, so we have to double check
358 // that this is not just a LF followed by CRLF
359 if (((countEOLs[EOL_CRLF] > 1) || (countEOLs[EOL_LF] > 1) || (GetCount() < 2)) &&
360 ((i > 2) && (*(pTextBuf+1) == '\n')))
362 // change the EOL back to a simple LF
363 eEol = EOL_LF;
366 break;
367 case 0x000b:
368 eEol = EOL_VT;
369 break;
370 case 0x000c:
371 eEol = EOL_FF;
372 break;
373 case 0x0085:
374 eEol = EOL_NEL;
375 break;
376 case 0x2028:
377 eEol = EOL_LS;
378 break;
379 case 0x2029:
380 eEol = EOL_PS;
381 break;
382 default:
383 continue;
385 oTextLine.sLine = CString(pLineStart, (int)(pTextBuf-pLineStart)-1);
386 oTextLine.eEnding = eEol;
387 CStdFileLineArray::Add(oTextLine);
388 ++countEOLs[eEol];
389 if (eEol==EOL_CRLF || eEol==EOL_LFCR)
391 ++pTextBuf;
392 --i;
394 pLineStart = pTextBuf;
396 CString line(pLineStart, (int)(pTextBuf-pLineStart));
397 Add(line, EOL_NOENDING);
399 // some EOLs are not supported by the svn diff lib.
400 m_bNeedsConversion |= (countEOLs[EOL_CRLF]!=0);
401 m_bNeedsConversion |= (countEOLs[EOL_FF]!=0);
402 m_bNeedsConversion |= (countEOLs[EOL_VT]!=0);
403 m_bNeedsConversion |= (countEOLs[EOL_NEL]!=0);
404 m_bNeedsConversion |= (countEOLs[EOL_LS]!=0);
405 m_bNeedsConversion |= (countEOLs[EOL_PS]!=0);
407 size_t eolmax = 0;
408 for (int nEol = 0; nEol<EOL__COUNT; nEol++)
410 if (eolmax < countEOLs[nEol])
412 eolmax = countEOLs[nEol];
413 m_SaveParams.m_LineEndings = (EOL)nEol;
417 return TRUE;
420 void CFileTextLines::StripWhiteSpace(CString& sLine, DWORD dwIgnoreWhitespaces, bool blame)
422 if (blame)
424 if (sLine.GetLength() > 66)
425 sLine = sLine.Mid(66);
427 switch (dwIgnoreWhitespaces)
429 case 0:
430 // Compare whitespaces
431 // do nothing
432 break;
433 case 1:
434 // Ignore all whitespaces
435 sLine.TrimLeft(_T(" \t"));
436 sLine.TrimRight(_T(" \t"));
437 break;
438 case 2:
439 // Ignore leading whitespace
440 sLine.TrimLeft(_T(" \t"));
441 break;
442 case 3:
443 // Ignore ending whitespace
444 sLine.TrimRight(_T(" \t"));
445 break;
450 Encoding pattern:
451 - encode & save BOM
452 - Get Line
453 - modify line - whitespaces, lowercase
454 - encode & save line
455 - get cached encoded eol
456 - save eol
458 BOOL CFileTextLines::Save( const CString& sFilePath
459 , bool bSaveAsUTF8 /*= false */
460 , bool bUseSVNCompatibleEOLs /*= false */
461 , DWORD dwIgnoreWhitespaces /*= 0 */
462 , BOOL bIgnoreCase /*= FALSE */
463 , bool bBlame /*= false*/
464 , bool bIgnoreComments /*= false*/
465 , const CString& linestart /*= CString()*/
466 , const CString& blockstart /*= CString()*/
467 , const CString& blockend /*= CString()*/
468 , const std::wregex& rx /*= std::wregex(L"")*/
469 , const std::wstring& replacement /*=L""*/)
471 m_sCommentLine = linestart;
472 m_sCommentBlockStart = blockstart;
473 m_sCommentBlockEnd = blockend;
477 CString destPath = sFilePath;
478 // now make sure that the destination directory exists
479 int ind = 0;
480 while (destPath.Find('\\', ind)>=2)
482 if (!PathIsDirectory(destPath.Left(destPath.Find('\\', ind))))
484 if (!CreateDirectory(destPath.Left(destPath.Find('\\', ind)), NULL))
485 return FALSE;
487 ind = destPath.Find('\\', ind)+1;
490 CStdioFile file; // Hugely faster than CFile for big file writes - because it uses buffering
491 if (!file.Open(sFilePath, CFile::modeCreate | CFile::modeWrite | CFile::typeBinary | CFile::shareDenyNone))
493 const_cast<CString *>(&m_sErrorString)->Format(IDS_ERR_FILE_OPEN, (LPCTSTR)sFilePath);
494 return FALSE;
497 CBaseFilter * pFilter = NULL;
498 bool bSaveBom = true;
499 CFileTextLines::UnicodeType eUnicodeType = bSaveAsUTF8 ? CFileTextLines::UTF8 : m_SaveParams.m_UnicodeType;
500 switch (eUnicodeType)
502 default:
503 case CFileTextLines::ASCII:
504 bSaveBom = false;
505 pFilter = new CAsciiFilter(&file);
506 break;
507 case CFileTextLines::UTF8:
508 bSaveBom = false;
509 case CFileTextLines::UTF8BOM:
510 pFilter = new CUtf8Filter(&file);
511 break;
512 case CFileTextLines::UTF16_BE:
513 bSaveBom = false;
514 pFilter = new CUtf16beFilter(&file);
515 break;
516 case CFileTextLines::UTF16_BEBOM:
517 pFilter = new CUtf16beFilter(&file);
518 break;
519 case CFileTextLines::UTF16_LE:
520 bSaveBom = false;
521 pFilter = new CUtf16leFilter(&file);
522 break;
523 case CFileTextLines::UTF16_LEBOM:
524 pFilter = new CUtf16leFilter(&file);
525 break;
526 case CFileTextLines::UTF32_BE:
527 pFilter = new CUtf32beFilter(&file);
528 break;
529 case CFileTextLines::UTF32_LE:
530 pFilter = new CUtf32leFilter(&file);
531 break;
534 if (bSaveBom)
536 //first write the BOM
537 pFilter->Write(L"\xfeff");
539 // cache EOLs
540 CBuffer oEncodedEol[EOL__COUNT];
541 oEncodedEol[EOL_LF] = pFilter->Encode(_T("\n")); // x0a
542 oEncodedEol[EOL_CR] = pFilter->Encode(_T("\r")); // x0d
543 oEncodedEol[EOL_CRLF] = pFilter->Encode(_T("\r\n")); // x0d x0a
544 if (bUseSVNCompatibleEOLs)
546 // when using EOLs that are supported by the svn lib,
547 // we have to use the same EOLs as the file has in case
548 // they're already supported, but a different supported one
549 // in case the original one isn't supported.
550 // Only this way the option "ignore EOLs (recommended)" unchecked
551 // actually shows the lines as different.
552 // However, the diff won't find and differences in EOLs
553 // for these special EOLs if they differ between those special ones
554 // listed below.
555 // But it will work properly for the most common EOLs LF/CR/CRLF.
556 oEncodedEol[EOL_LFCR] = oEncodedEol[EOL_CR];
557 for (int nEol = 0; nEol<EOL_NOENDING; nEol++)
559 if (oEncodedEol[nEol].IsEmpty())
560 oEncodedEol[nEol] = oEncodedEol[EOL_LF];
563 else
565 oEncodedEol[EOL_LFCR] = pFilter->Encode(_T("\n\r"));
566 oEncodedEol[EOL_VT] = pFilter->Encode(_T("\v")); // x0b
567 oEncodedEol[EOL_FF] = pFilter->Encode(_T("\f")); // x0c
568 oEncodedEol[EOL_NEL] = pFilter->Encode(_T("\x85"));
569 oEncodedEol[EOL_LS] = pFilter->Encode(_T("\x2028"));
570 oEncodedEol[EOL_PS] = pFilter->Encode(_T("\x2029"));
572 oEncodedEol[EOL_AUTOLINE] = oEncodedEol[m_SaveParams.m_LineEndings==EOL_AUTOLINE
573 ? EOL_CRLF
574 : m_SaveParams.m_LineEndings];
576 bool bInBlockComment = false;
577 for (int i=0; i<GetCount(); i++)
579 CString sLineT = GetAt(i);
580 if (bIgnoreComments)
581 bInBlockComment = StripComments(sLineT, bInBlockComment);
582 if (!rx._Empty())
583 LineRegex(sLineT, rx, replacement);
584 StripWhiteSpace(sLineT, dwIgnoreWhitespaces, bBlame);
585 if (bIgnoreCase)
586 sLineT = sLineT.MakeLower();
587 pFilter->Write(sLineT);
588 EOL eEol = GetLineEnding(i);
589 pFilter->Write(oEncodedEol[eEol]);
591 delete pFilter;
592 file.Close();
594 catch (CException * e)
596 CString * psErrorString = const_cast<CString *>(&m_sErrorString);
597 e->GetErrorMessage(psErrorString->GetBuffer(4096), 4096);
598 psErrorString->ReleaseBuffer();
599 e->Delete();
600 return FALSE;
602 return TRUE;
605 void CFileTextLines::SetErrorString()
607 m_sErrorString = CFormatMessageWrapper();
610 void CFileTextLines::CopySettings(CFileTextLines * pFileToCopySettingsTo) const
612 if (pFileToCopySettingsTo)
614 pFileToCopySettingsTo->m_SaveParams = m_SaveParams;
618 const wchar_t * CFileTextLines::GetEncodingName(UnicodeType eEncoding)
620 switch (eEncoding)
622 case ASCII:
623 return L"ASCII";
624 case BINARY:
625 return L"BINARY";
626 case UTF16_LE:
627 return L"UTF-16LE";
628 case UTF16_LEBOM:
629 return L"UTF-16LE BOM";
630 case UTF16_BE:
631 return L"UTF-16BE";
632 case UTF16_BEBOM:
633 return L"UTF-16BE BOM";
634 case UTF32_LE:
635 return L"UTF-32LE";
636 case UTF32_BE:
637 return L"UTF-32BE";
638 case UTF8:
639 return L"UTF-8";
640 case UTF8BOM:
641 return L"UTF-8 BOM";
643 return L"";
646 bool CFileTextLines::StripComments( CString& sLine, bool bInBlockComment )
648 int startpos = 0;
652 if (bInBlockComment)
654 int endpos = sLine.Find(m_sCommentBlockEnd);
655 if (endpos >= 0)
657 sLine = sLine.Left(startpos) + sLine.Mid(endpos+m_sCommentBlockEnd.GetLength());
658 bInBlockComment = false;
660 else
662 sLine = sLine.Left(startpos);
663 startpos = -1;
666 if (!bInBlockComment)
668 startpos = m_sCommentBlockStart.IsEmpty() ? -1 : sLine.Find(m_sCommentBlockStart);
669 int startpos2 = m_sCommentLine.IsEmpty() ? -1 : sLine.Find(m_sCommentLine);
670 if ( ((startpos2 < startpos) && (startpos2 >= 0)) ||
671 ((startpos2 >= 0) && (startpos < 0)) )
673 // line comment
674 // look if there's a string marker (" or ') before that
675 // note: this check is not fully correct. For example, it
676 // does not account for escaped chars or even multiline strings.
677 // but it has to be fast, so this has to do...
678 int scount = 0;
679 int ccount = 0;
680 auto spos = sLine.Find('"');
681 while ((spos >= 0) && (spos < startpos2))
683 ++scount;
684 spos = sLine.Find('"', spos + 1);
686 auto cpos = sLine.Find('\'');
687 while ((cpos >= 0) && (cpos < startpos2))
689 ++ccount;
690 cpos = sLine.Find('"', cpos + 1);
692 if ((scount % 2 == 0) && (ccount % 2 == 0))
694 // line comment, erase the rest of the line
695 sLine = sLine.Left(startpos2);
696 startpos = -1;
699 else if (startpos >= 0)
701 // starting block comment
702 bInBlockComment = true;
705 } while (startpos >= 0);
707 return bInBlockComment;
710 void CFileTextLines::LineRegex( CString& sLine, const std::wregex& rx, const std::wstring& replacement ) const
712 std::wstring str = (LPCTSTR)sLine;
713 std::wstring str2 = std::regex_replace(str, rx, replacement);
714 sLine = str2.c_str();
718 void CBuffer::ExpandToAtLeast(int nNewSize)
720 if (nNewSize>m_nAllocated)
722 delete [] m_pBuffer; // we don't preserve buffer content intentionally
723 nNewSize+=2048-1;
724 nNewSize&=~(1024-1);
725 m_pBuffer=new BYTE[nNewSize];
726 m_nAllocated=nNewSize;
730 void CBuffer::SetLength(int nUsed)
732 ExpandToAtLeast(nUsed);
733 m_nUsed = nUsed;
736 void CBuffer::Swap(CBuffer & Src)
738 std::swap(Src.m_nAllocated, m_nAllocated);
739 std::swap(Src.m_pBuffer, m_pBuffer);
740 std::swap(Src.m_nUsed, m_nUsed);
743 void CBuffer::Copy(const CBuffer & Src)
745 if (&Src != this)
747 SetLength(Src.m_nUsed);
748 memcpy(m_pBuffer, Src.m_pBuffer, m_nUsed);
754 bool CBaseFilter::Decode(/*in out*/ CBuffer & data)
756 int nFlags = (m_nCodePage==CP_ACP) ? MB_PRECOMPOSED : 0;
757 // dry decode is around 8 times faster then real one, alternatively we can set buffer to max length
758 int nReadChars = MultiByteToWideChar(m_nCodePage, nFlags, (LPCSTR)data, data.GetLength(), NULL, 0);
759 m_oBuffer.SetLength(nReadChars*sizeof(wchar_t));
760 int ret2 = MultiByteToWideChar(m_nCodePage, nFlags, (LPCSTR)data, data.GetLength(), (LPWSTR)(void *)m_oBuffer, nReadChars);
761 if (ret2 != nReadChars)
763 return FALSE;
765 data.Swap(m_oBuffer);
766 return TRUE;
769 const CBuffer & CBaseFilter::Encode(const CString s)
771 m_oBuffer.SetLength(s.GetLength()*3+1); // set buffer to guessed max size
772 int nConvertedLen = WideCharToMultiByte(m_nCodePage, 0, (LPCTSTR)s, s.GetLength(), (LPSTR)m_oBuffer, m_oBuffer.GetLength(), NULL, NULL);
773 m_oBuffer.SetLength(nConvertedLen); // set buffer to used size
774 return m_oBuffer;
779 bool CUtf16leFilter::Decode(/*in out*/ CBuffer & /*data*/)
781 // we believe data is ok for use
782 return TRUE;
785 const CBuffer & CUtf16leFilter::Encode(const CString s)
787 int nNeedBytes = s.GetLength()*sizeof(TCHAR);
788 m_oBuffer.SetLength(nNeedBytes);
789 memcpy((void *)m_oBuffer, (LPCTSTR)s, nNeedBytes);
790 return m_oBuffer;
795 bool CUtf16beFilter::Decode(/*in out*/ CBuffer & data)
797 int nNeedBytes = data.GetLength();
798 // make in place WORD BYTEs swap
799 UINT64 * p_qw = (UINT64 *)(void *)data;
800 int nQwords = nNeedBytes/8;
801 for (int nQword = 0; nQword<nQwords; nQword++)
803 p_qw[nQword] = WordSwapBytes(p_qw[nQword]);
805 wchar_t * p_w = (wchar_t *)p_qw;
806 int nWords = nNeedBytes/2;
807 for (int nWord = nQwords*4; nWord<nWords; nWord++)
809 p_w[nWord] = WideCharSwap(p_w[nWord]);
811 return CUtf16leFilter::Decode(data);
814 const CBuffer & CUtf16beFilter::Encode(const CString s)
816 int nNeedBytes = s.GetLength()*sizeof(TCHAR);
817 m_oBuffer.SetLength(nNeedBytes);
818 // copy swaping BYTE order in WORDs
819 const UINT64 * p_qwIn = (const UINT64 *)(LPCTSTR)s;
820 UINT64 * p_qwOut = (UINT64 *)(void *)m_oBuffer;
821 int nQwords = nNeedBytes/8;
822 for (int nQword = 0; nQword<nQwords; nQword++)
824 p_qwOut[nQword] = WordSwapBytes(p_qwIn[nQword]);
826 wchar_t * p_wIn = (wchar_t *)p_qwIn;
827 wchar_t * p_wOut = (wchar_t *)p_qwOut;
828 int nWords = nNeedBytes/2;
829 for (int nWord = nQwords*4; nWord<nWords; nWord++)
831 p_wOut[nWord] = WideCharSwap(p_wIn[nWord]);
833 return m_oBuffer;
838 bool CUtf32leFilter::Decode(/*in out*/ CBuffer & data)
840 // UTF32 have four bytes per char
841 int nReadChars = data.GetLength()/4;
842 UINT32 * p32 = (UINT32 *)(void *)data;
844 // count chars which needs surrogate pair
845 int nSurrogatePairCount = 0;
846 for (int i = 0; i<nReadChars; ++i)
848 if (p32[i]<0x110000 && p32[i]>=0x10000)
850 ++nSurrogatePairCount;
854 // fill buffer
855 m_oBuffer.SetLength((nReadChars+nSurrogatePairCount)*sizeof(wchar_t));
856 wchar_t * pOut = (wchar_t *)m_oBuffer;
857 for (int i = 0; i<nReadChars; ++i, ++pOut)
859 UINT32 zChar = p32[i];
860 if (zChar>=0x110000)
862 *pOut=0xfffd; // ? mark
864 else if (zChar>=0x10000)
866 zChar-=0x10000;
867 pOut[0] = ((zChar>>10)&0x3ff) | 0xd800; // lead surrogate
868 pOut[1] = (zChar&0x7ff) | 0xdc00; // trail surrogate
869 pOut++;
871 else
873 *pOut = (wchar_t)zChar;
876 data.Swap(m_oBuffer);
877 return TRUE;
880 const CBuffer & CUtf32leFilter::Encode(const CString s)
882 int nInWords = s.GetLength();
883 m_oBuffer.SetLength(nInWords*2);
885 LPCTSTR p_In = (LPCTSTR)s;
886 UINT32 * p_Out = (UINT32 *)(void *)m_oBuffer;
887 int nOutDword = 0;
888 for (int nInWord = 0; nInWord<nInWords; nInWord++, nOutDword++)
890 UINT32 zChar = p_In[nInWord];
891 if ((zChar&0xfc00) == 0xd800) // lead surrogate
893 if (nInWord+1<nInWords && (p_In[nInWord+1]&0xfc00) == 0xdc00) // trail surrogate follows
895 zChar = 0x10000 + ((zChar&0x3ff)<<10) + (p_In[++nInWord]&0x3ff);
897 else
899 zChar = 0xfffd; // ? mark
902 else if ((zChar&0xfc00) == 0xdc00) // trail surrogate without lead
904 zChar = 0xfffd; // ? mark
906 p_Out[nOutDword] = zChar;
908 m_oBuffer.SetLength(nOutDword*4); // store length reduced by surrogates
909 return m_oBuffer;
914 bool CUtf32beFilter::Decode(/*in out*/ CBuffer & data)
917 // swap BYTEs order in DWORDs
918 UINT64 * p64 = (UINT64 *)(void *)data;
919 int nQwords = data.GetLength()/8;
920 for (int nQword = 0; nQword<nQwords; nQword++)
922 p64[nQword] = DwordSwapBytes(p64[nQword]);
925 UINT32 * p32 = (UINT32 *)p64;
926 int nDwords = data.GetLength()/4;
927 for (int nDword = nQwords*2; nDword<nDwords; nDword++)
929 p32[nDword] = DwordSwapBytes(p32[nDword]);
931 return CUtf32leFilter::Decode(data);
934 const CBuffer & CUtf32beFilter::Encode(const CString s)
936 CUtf32leFilter::Encode(s);
938 // swap BYTEs order in DWORDs
939 UINT64 * p64 = (UINT64 *)(void *)m_oBuffer;
940 int nQwords = m_oBuffer.GetLength()/8;
941 for (int nQword = 0; nQword<nQwords; nQword++)
943 p64[nQword] = DwordSwapBytes(p64[nQword]);
946 UINT32 * p32 = (UINT32 *)p64;
947 int nDwords = m_oBuffer.GetLength()/4;
948 for (int nDword = nQwords*2; nDword<nDwords; nDword++)
950 p32[nDword] = DwordSwapBytes(p32[nDword]);
952 return m_oBuffer;