Fixed issue #1667: Ability to ignore a folder in commit dialog
[TortoiseGit.git] / src / TortoiseMerge / FileTextLines.cpp
blobe7adc1b0ed033c4a21c52091c1ace3eb21df5c2e
1 // TortoiseGitMerge - a Diff/Patch program
3 // Copyright (C) 2007-2012 - TortoiseSVN
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License
7 // as published by the Free Software Foundation; either version 2
8 // of the License, or (at your option) any later version.
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software Foundation,
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 #include "stdafx.h"
20 #include "resource.h"
21 #include "UnicodeUtils.h"
22 #include "registry.h"
23 #include "filetextlines.h"
24 #include "FormatMessageWrapper.h"
25 #include "SmartHandle.h"
27 wchar_t inline WideCharSwap(wchar_t nValue)
29 return (((nValue>> 8)) | (nValue << 8));
30 //return _byteswap_ushort(nValue);
33 UINT64 inline WordSwapBytes(UINT64 nValue)
35 return ((nValue&0xff00ff00ff00ff)<<8) | ((nValue>>8)&0xff00ff00ff00ff); // swap BYTESs in WORDs
38 UINT32 inline DwordSwapBytes(UINT32 nValue)
40 UINT32 nRet = (nValue<<16) | (nValue>>16); // swap WORDs
41 nRet = ((nRet&0xff00ff)<<8) | ((nRet>>8)&0xff00ff); // swap BYTESs in WORDs
42 return nRet;
43 //return _byteswap_ulong(nValue);
46 UINT64 inline DwordSwapBytes(UINT64 nValue)
48 UINT64 nRet = ((nValue&0xffff0000ffffL)<<16) | ((nValue>>16)&0xffff0000ffffL); // swap WORDs in DWORDs
49 nRet = ((nRet&0xff00ff00ff00ff)<<8) | ((nRet>>8)&0xff00ff00ff00ff); // swap BYTESs in WORDs
50 return nRet;
53 CFileTextLines::CFileTextLines(void)
54 : m_UnicodeType(CFileTextLines::AUTOTYPE)
55 , m_LineEndings(EOL_AUTOLINE)
56 , m_bNeedsConversion(false)
60 CFileTextLines::~CFileTextLines(void)
64 CFileTextLines::UnicodeType CFileTextLines::CheckUnicodeType(LPVOID pBuffer, int cb)
66 if (cb < 2)
67 return CFileTextLines::ASCII;
68 const UINT32 * const pVal32 = (UINT32 *)pBuffer;
69 const UINT16 * const pVal16 = (UINT16 *)pBuffer;
70 const UINT8 * const pVal8 = (UINT8 *)pBuffer;
71 // scan the whole buffer for a 0x00000000 sequence
72 // if found, we assume a binary file
73 int nDwords = cb/4;
74 for (int i=0; i<nDwords; ++i)
76 if (0x00000000 == pVal32[i])
77 return CFileTextLines::BINARY;
79 if (cb >=4 )
81 if (*pVal32 == 0x0000FEFF)
83 return CFileTextLines::UTF32_LE;
85 if (*pVal32 == 0xFFFE0000)
87 return CFileTextLines::UTF32_BE;
90 if (*pVal16 == 0xFEFF)
92 return CFileTextLines::UTF16_LE;
94 if (*pVal16 == 0xFFFE)
96 return CFileTextLines::UTF16_BE;
98 if (cb < 3)
99 return CFileTextLines::ASCII;
100 if (*pVal16 == 0xBBEF)
102 if (pVal8[2] == 0xBF)
103 return CFileTextLines::UTF8BOM;
105 // check for illegal UTF8 sequences
106 bool bNonANSI = false;
107 int nNeedData = 0;
108 int i=0;
109 // run fast for ascii
110 for (; i<cb; i+=8)
112 if ((*(UINT64 *)&pVal8[i] & 0x8080808080808080)!=0) // all Ascii?
114 bNonANSI = true;
115 break;
118 // continue slow
119 for (; i<cb; ++i)
121 UINT8 zChar = pVal8[i];
122 if ((zChar & 0x80)==0) // Ascii
124 if (nNeedData)
126 return CFileTextLines::ASCII;
128 continue;
130 if ((zChar & 0x40)==0) // top bit
132 if (!nNeedData)
133 return CFileTextLines::ASCII;
134 --nNeedData;
136 else if (nNeedData)
138 return CFileTextLines::ASCII;
140 else if ((zChar & 0x20)==0) // top two bits
142 if (zChar<=0xC1)
143 return CFileTextLines::ASCII;
144 nNeedData = 1;
146 else if ((zChar & 0x10)==0) // top three bits
148 nNeedData = 2;
150 else if ((zChar & 0x08)==0) // top four bits
152 if (zChar>=0xf5)
153 return CFileTextLines::ASCII;
154 nNeedData = 3;
156 else
157 return CFileTextLines::ASCII;
159 if (bNonANSI && nNeedData==0)
160 // if get here thru nonAscii and no missing data left then its valid UTF8
161 return CFileTextLines::UTF8;
162 if ((!bNonANSI)&&(DWORD(CRegDWORD(_T("Software\\TortoiseGitMerge\\UseUTF8"), FALSE))))
163 return CFileTextLines::UTF8;
164 return CFileTextLines::ASCII;
168 BOOL CFileTextLines::Load(const CString& sFilePath, int lengthHint /* = 0*/)
170 WCHAR exceptionError[1000] = {0};
171 m_LineEndings = EOL_AUTOLINE;
172 m_UnicodeType = CFileTextLines::AUTOTYPE;
173 RemoveAll();
174 if(lengthHint != 0)
176 Reserve(lengthHint);
179 if (PathIsDirectory(sFilePath))
181 m_sErrorString.Format(IDS_ERR_FILE_NOTAFILE, (LPCTSTR)sFilePath);
182 return FALSE;
185 if (!PathFileExists(sFilePath))
187 //file does not exist, so just return SUCCESS
188 return TRUE;
191 CAutoFile hFile = CreateFile(sFilePath, GENERIC_READ, FILE_SHARE_READ|FILE_SHARE_DELETE|FILE_SHARE_WRITE, NULL, OPEN_EXISTING, NULL, NULL);
192 if (!hFile)
194 SetErrorString();
195 return FALSE;
198 LARGE_INTEGER fsize;
199 if (!GetFileSizeEx(hFile, &fsize))
201 SetErrorString();
202 return FALSE;
204 if (fsize.HighPart)
206 // file is way too big for us
207 m_sErrorString.LoadString(IDS_ERR_FILE_TOOBIG);
208 return FALSE;
211 // create buffer
212 // If new[] was done for type T delete[] must be called on a pointer of type T*,
213 // otherwise the behavior is undefined.
214 // +1 is to address possible truncation when integer division is done
215 CBuffer oFile;
218 oFile.SetLength(fsize.LowPart);
220 catch (CMemoryException* e)
222 e->GetErrorMessage(exceptionError, _countof(exceptionError));
223 m_sErrorString = exceptionError;
224 return FALSE;
227 // load file
228 DWORD dwReadBytes = 0;
229 if (!ReadFile(hFile, (void *)oFile, fsize.LowPart, &dwReadBytes, NULL))
231 SetErrorString();
232 return FALSE;
234 hFile.CloseHandle();
236 // detect type
237 if (m_UnicodeType == CFileTextLines::AUTOTYPE)
239 m_UnicodeType = this->CheckUnicodeType((LPVOID)oFile, dwReadBytes);
240 // enforce conversion for all but ASCII and UTF8 type
241 m_bNeedsConversion = (m_UnicodeType!=CFileTextLines::UTF8)&&(m_UnicodeType!=CFileTextLines::ASCII);
244 // we may have to convert the file content - CString is UTF16LE
247 CBaseFilter * pFilter = NULL;
248 switch (m_UnicodeType)
250 case BINARY:
251 m_sErrorString.Format(IDS_ERR_FILE_BINARY, (LPCTSTR)sFilePath);
252 return FALSE;
253 case UTF8:
254 case UTF8BOM:
255 pFilter = new CUtf8Filter(NULL);
256 break;
257 default:
258 case ASCII:
259 pFilter = new CAsciiFilter(NULL);
260 break;
261 case UTF16_BE:
262 pFilter = new CUtf16beFilter(NULL);
263 break;
264 case UTF16_LE:
265 pFilter = new CUtf16leFilter(NULL);
266 break;
267 case UTF32_BE:
268 pFilter = new CUtf32beFilter(NULL);
269 break;
270 case UTF32_LE:
271 pFilter = new CUtf32leFilter(NULL);
272 break;
274 pFilter->Decode(oFile);
275 delete pFilter;
277 catch (CMemoryException* e)
279 e->GetErrorMessage(exceptionError, _countof(exceptionError));
280 m_sErrorString = exceptionError;
281 return FALSE;
284 int nReadChars=oFile.GetLength()/sizeof(wchar_t);
285 wchar_t * pTextBuf = (wchar_t *)oFile;
286 wchar_t * pLineStart = pTextBuf;
287 if ((m_UnicodeType == UTF8BOM)
288 || (m_UnicodeType == UTF16_LE)
289 || (m_UnicodeType == UTF16_BE)
290 || (m_UnicodeType == UTF32_LE)
291 || (m_UnicodeType == UTF32_BE))
293 // ignore the BOM
294 ++pTextBuf;
295 ++pLineStart;
296 --nReadChars;
299 // fill in the lines into the array
300 size_t countEOLs[EOL__COUNT];
301 memset(countEOLs, 0, sizeof(countEOLs));
302 CFileTextLine oTextLine;
303 for (int i = nReadChars; i; --i)
305 EOL eEol;
306 switch (*pTextBuf++)
308 case '\r':
309 // crlf line ending or cr line ending
310 eEol = ((i > 1) && *(pTextBuf) == '\n') ? EOL_CRLF : EOL_CR;
311 break;
312 case '\n':
313 // lfcr line ending or lf line ending
314 eEol = ((i > 1) && *(pTextBuf) == '\r') ? EOL_LFCR : EOL_LF;
315 break;
316 case 0x000b:
317 eEol = EOL_VT;
318 break;
319 case 0x000c:
320 eEol = EOL_FF;
321 break;
322 case 0x0085:
323 eEol = EOL_NEL;
324 break;
325 case 0x2028:
326 eEol = EOL_LS;
327 break;
328 case 0x2029:
329 eEol = EOL_PS;
330 break;
331 default:
332 continue;
334 oTextLine.sLine = CString(pLineStart, (int)(pTextBuf-pLineStart)-1);
335 oTextLine.eEnding = eEol;
336 Add(oTextLine);
337 ++countEOLs[eEol];
338 if (eEol==EOL_CRLF || eEol==EOL_LFCR)
340 ++pTextBuf;
341 --i;
343 pLineStart = pTextBuf;
345 CString line(pLineStart, (int)(pTextBuf-pLineStart));
346 Add(line, EOL_NOENDING);
348 // some EOLs are not supported by the svn diff lib.
349 m_bNeedsConversion |= (countEOLs[EOL_CRLF]!=0);
350 m_bNeedsConversion |= (countEOLs[EOL_FF]!=0);
351 m_bNeedsConversion |= (countEOLs[EOL_VT]!=0);
352 m_bNeedsConversion |= (countEOLs[EOL_NEL]!=0);
353 m_bNeedsConversion |= (countEOLs[EOL_LS]!=0);
354 m_bNeedsConversion |= (countEOLs[EOL_PS]!=0);
356 size_t eolmax = 0;
357 for (int nEol = 0; nEol<EOL__COUNT; nEol++)
359 if (eolmax < countEOLs[nEol])
361 eolmax = countEOLs[nEol];
362 m_LineEndings = (EOL)nEol;
366 return TRUE;
369 void CFileTextLines::StripWhiteSpace(CString& sLine, DWORD dwIgnoreWhitespaces, bool blame)
371 if (blame)
373 if (sLine.GetLength() > 66)
374 sLine = sLine.Mid(66);
376 switch (dwIgnoreWhitespaces)
378 case 0:
379 // Compare whitespaces
380 // do nothing
381 break;
382 case 1:
383 // Ignore all whitespaces
384 sLine.TrimLeft(_T(" \t"));
385 sLine.TrimRight(_T(" \t"));
386 break;
387 case 2:
388 // Ignore leading whitespace
389 sLine.TrimLeft(_T(" \t"));
390 break;
391 case 3:
392 // Ignore ending whitespace
393 sLine.TrimRight(_T(" \t"));
394 break;
399 Encoding pattern:
400 - encode & save BOM
401 - Get Line
402 - modify line - whitespaces, lowercase
403 - encode & save line
404 - get cached encoded eol
405 - save eol
407 BOOL CFileTextLines::Save(const CString& sFilePath
408 , bool bSaveAsUTF8 /*= false*/
409 , bool bUseSVNCompatibleEOLs /*= false*/
410 , DWORD dwIgnoreWhitespaces /*=0*/
411 , BOOL bIgnoreCase /*= FALSE*/
412 , bool bBlame /*= false*/) const
416 CString destPath = sFilePath;
417 // now make sure that the destination directory exists
418 int ind = 0;
419 while (destPath.Find('\\', ind)>=2)
421 if (!PathIsDirectory(destPath.Left(destPath.Find('\\', ind))))
423 if (!CreateDirectory(destPath.Left(destPath.Find('\\', ind)), NULL))
424 return FALSE;
426 ind = destPath.Find('\\', ind)+1;
429 CStdioFile file; // Hugely faster than CFile for big file writes - because it uses buffering
430 if (!file.Open(sFilePath, CFile::modeCreate | CFile::modeWrite | CFile::typeBinary))
432 const_cast<CString *>(&m_sErrorString)->Format(IDS_ERR_FILE_OPEN, (LPCTSTR)sFilePath);
433 return FALSE;
436 CBaseFilter * pFilter = NULL;
437 bool bSaveBom = true;
438 CFileTextLines::UnicodeType eUnicodeType = bSaveAsUTF8 ? CFileTextLines::UTF8 : m_UnicodeType;
439 switch (eUnicodeType)
441 default:
442 case CFileTextLines::ASCII:
443 bSaveBom = false;
444 pFilter = new CAsciiFilter(&file);
445 break;
446 case CFileTextLines::UTF8:
447 bSaveBom = false;
448 case CFileTextLines::UTF8BOM:
449 pFilter = new CUtf8Filter(&file);
450 break;
451 case CFileTextLines::UTF16_BE:
452 pFilter = new CUtf16beFilter(&file);
453 break;
454 case CFileTextLines::UTF16_LE:
455 pFilter = new CUtf16leFilter(&file);
456 break;
457 case CFileTextLines::UTF32_BE:
458 pFilter = new CUtf32beFilter(&file);
459 break;
460 case CFileTextLines::UTF32_LE:
461 pFilter = new CUtf32leFilter(&file);
462 break;
465 if (bSaveBom)
467 //first write the BOM
468 pFilter->Write(L"\xfeff");
470 // cache EOLs
471 CBuffer oEncodedEol[EOL__COUNT];
472 oEncodedEol[EOL_LF] = pFilter->Encode(_T("\n")); // x0a
473 oEncodedEol[EOL_CR] = pFilter->Encode(_T("\r")); // x0d
474 oEncodedEol[EOL_CRLF] = pFilter->Encode(_T("\r\n")); // x0d x0a
475 if (bUseSVNCompatibleEOLs)
477 // when using EOLs that are supported by the svn lib,
478 // we have to use the same EOLs as the file has in case
479 // they're already supported, but a different supported one
480 // in case the original one isn't supported.
481 // Only this way the option "ignore EOLs (recommended)" unchecked
482 // actually shows the lines as different.
483 // However, the diff won't find and differences in EOLs
484 // for these special EOLs if they differ between those special ones
485 // listed below.
486 // But it will work properly for the most common EOLs LF/CR/CRLF.
487 oEncodedEol[EOL_LFCR] = oEncodedEol[EOL_CR];
488 for (int nEol = 0; nEol<EOL_NOENDING; nEol++)
490 if (oEncodedEol[nEol].IsEmpty())
491 oEncodedEol[nEol] = oEncodedEol[EOL_LF];
494 else
496 oEncodedEol[EOL_LFCR] = pFilter->Encode(_T("\n\r"));
497 oEncodedEol[EOL_VT] = pFilter->Encode(_T("\v")); // x0b
498 oEncodedEol[EOL_FF] = pFilter->Encode(_T("\f")); // x0c
499 oEncodedEol[EOL_NEL] = pFilter->Encode(_T("\x85"));
500 oEncodedEol[EOL_LS] = pFilter->Encode(_T("\x2028"));
501 oEncodedEol[EOL_PS] = pFilter->Encode(_T("\x2029"));
503 oEncodedEol[EOL_AUTOLINE] = oEncodedEol[m_LineEndings==EOL_AUTOLINE ? EOL_CRLF : m_LineEndings];
505 for (int i=0; i<GetCount(); i++)
507 CString sLineT = GetAt(i);
508 StripWhiteSpace(sLineT, dwIgnoreWhitespaces, bBlame);
509 if (bIgnoreCase)
510 sLineT = sLineT.MakeLower();
511 pFilter->Write(sLineT);
512 EOL eEol = GetLineEnding(i);
513 pFilter->Write(oEncodedEol[eEol]);
515 delete pFilter;
516 file.Close();
518 catch (CException * e)
520 CString * psErrorString = const_cast<CString *>(&m_sErrorString);
521 e->GetErrorMessage(psErrorString->GetBuffer(4096), 4096);
522 psErrorString->ReleaseBuffer();
523 e->Delete();
524 return FALSE;
526 return TRUE;
529 void CFileTextLines::SetErrorString()
531 m_sErrorString = CFormatMessageWrapper();
534 void CFileTextLines::CopySettings(CFileTextLines * pFileToCopySettingsTo)
536 if (pFileToCopySettingsTo)
538 pFileToCopySettingsTo->m_UnicodeType = m_UnicodeType;
539 pFileToCopySettingsTo->m_LineEndings = m_LineEndings;
545 void CBuffer::ExpandToAtLeast(int nNewSize)
547 if (nNewSize>m_nAllocated)
549 delete [] m_pBuffer; // we don't preserve buffer content intentionally
550 nNewSize+=2048-1;
551 nNewSize&=~(1024-1);
552 m_pBuffer=new BYTE[nNewSize];
553 m_nAllocated=nNewSize;
557 void CBuffer::SetLength(int nUsed)
559 ExpandToAtLeast(nUsed);
560 m_nUsed = nUsed;
563 void CBuffer::Swap(CBuffer & Src)
565 std::swap(Src.m_nAllocated, m_nAllocated);
566 std::swap(Src.m_pBuffer, m_pBuffer);
567 std::swap(Src.m_nUsed, m_nUsed);
570 void CBuffer::Copy(const CBuffer & Src)
572 if (&Src != this)
574 SetLength(Src.m_nUsed);
575 memcpy(m_pBuffer, Src.m_pBuffer, m_nUsed);
581 bool CBaseFilter::Decode(/*in out*/ CBuffer & data)
583 int nFlags = (m_nCodePage==CP_ACP) ? MB_PRECOMPOSED : 0;
584 // dry decode is around 8 times faster then real one, alternatively we can set buffer to max length
585 int nReadChars = MultiByteToWideChar(m_nCodePage, nFlags, (LPCSTR)data, data.GetLength(), NULL, 0);
586 m_oBuffer.SetLength(nReadChars*sizeof(wchar_t));
587 int ret2 = MultiByteToWideChar(m_nCodePage, nFlags, (LPCSTR)data, data.GetLength(), (LPWSTR)(void *)m_oBuffer, nReadChars);
588 if (ret2 != nReadChars)
590 return FALSE;
592 data.Swap(m_oBuffer);
593 return TRUE;
596 const CBuffer & CBaseFilter::Encode(const CString s)
598 m_oBuffer.SetLength(s.GetLength()*3+1); // set buffer to guessed max size
599 int nConvertedLen = WideCharToMultiByte(m_nCodePage, 0, (LPCTSTR)s, s.GetLength(), (LPSTR)m_oBuffer, m_oBuffer.GetLength(), NULL, NULL);
600 m_oBuffer.SetLength(nConvertedLen); // set buffer to used size
601 return m_oBuffer;
606 bool CUtf16leFilter::Decode(/*in out*/ CBuffer & /*data*/)
608 // we believe data is ok for use
609 return TRUE;
612 const CBuffer & CUtf16leFilter::Encode(const CString s)
614 int nNeedBytes = s.GetLength()*sizeof(TCHAR);
615 m_oBuffer.SetLength(nNeedBytes);
616 memcpy((void *)m_oBuffer, (LPCTSTR)s, nNeedBytes);
617 return m_oBuffer;
622 bool CUtf16beFilter::Decode(/*in out*/ CBuffer & data)
624 int nNeedBytes = data.GetLength();
625 // make in place WORD BYTEs swap
626 UINT64 * p_qw = (UINT64 *)(void *)data;
627 int nQwords = nNeedBytes/8;
628 for (int nQword = 0; nQword<nQwords; nQword++)
630 p_qw[nQword] = WordSwapBytes(p_qw[nQword]);
632 wchar_t * p_w = (wchar_t *)p_qw;
633 int nWords = nNeedBytes/2;
634 for (int nWord = nQwords*4; nWord<nWords; nWord++)
636 p_w[nWord] = WideCharSwap(p_w[nWord]);
638 return CUtf16leFilter::Decode(data);
641 const CBuffer & CUtf16beFilter::Encode(const CString s)
643 int nNeedBytes = s.GetLength()*sizeof(TCHAR);
644 m_oBuffer.SetLength(nNeedBytes);
645 // copy swaping BYTE order in WORDs
646 const UINT64 * p_qwIn = (const UINT64 *)(LPCTSTR)s;
647 UINT64 * p_qwOut = (UINT64 *)(void *)m_oBuffer;
648 int nQwords = nNeedBytes/8;
649 for (int nQword = 0; nQword<nQwords; nQword++)
651 p_qwOut[nQword] = WordSwapBytes(p_qwIn[nQword]);
653 wchar_t * p_wIn = (wchar_t *)p_qwIn;
654 wchar_t * p_wOut = (wchar_t *)p_qwOut;
655 int nWords = nNeedBytes/2;
656 for (int nWord = nQwords*4; nWord<nWords; nWord++)
658 p_wOut[nWord] = WideCharSwap(p_wIn[nWord]);
660 return m_oBuffer;
665 bool CUtf32leFilter::Decode(/*in out*/ CBuffer & data)
667 // UTF32 have four bytes per char
668 int nReadChars = data.GetLength()/4;
669 UINT32 * p32 = (UINT32 *)(void *)data;
671 // count chars which needs surrogate pair
672 int nSurrogatePairCount = 0;
673 for (int i = 0; i<nReadChars; ++i)
675 if (p32[i]<0x110000 && p32[i]>=0x10000)
677 ++nSurrogatePairCount;
681 // fill buffer
682 m_oBuffer.SetLength((nReadChars+nSurrogatePairCount)*sizeof(wchar_t));
683 wchar_t * pOut = (wchar_t *)m_oBuffer;
684 for (int i = 0; i<nReadChars; ++i, ++pOut)
686 UINT32 zChar = p32[i];
687 if (zChar>=0x110000)
689 *pOut=0xfffd; // ? mark
691 else if (zChar>=0x10000)
693 zChar-=0x10000;
694 pOut[0] = ((zChar>>10)&0x3ff) | 0xd800; // lead surrogate
695 pOut[1] = (zChar&0x7ff) | 0xdc00; // trail surrogate
696 pOut++;
698 else
700 *pOut = (wchar_t)zChar;
703 data.Swap(m_oBuffer);
704 return TRUE;
707 const CBuffer & CUtf32leFilter::Encode(const CString s)
709 int nInWords = s.GetLength();
710 m_oBuffer.SetLength(nInWords*2);
712 LPCTSTR p_In = (LPCTSTR)s;
713 UINT32 * p_Out = (UINT32 *)(void *)m_oBuffer;
714 int nOutDword = 0;
715 for (int nInWord = 0; nInWord<nInWords; nInWord++, nOutDword++)
717 UINT32 zChar = p_In[nInWord];
718 if ((zChar&0xfc00) == 0xd800) // lead surrogate
720 if (nInWord+1<nInWords && (p_In[nInWord+1]&0xfc00) == 0xdc00) // trail surrogate follows
722 zChar = 0x10000 + ((zChar&0x3ff)<<10) + (p_In[++nInWord]&0x3ff);
724 else
726 zChar = 0xfffd; // ? mark
729 else if ((zChar&0xfc00) == 0xdc00) // trail surrogate without lead
731 zChar = 0xfffd; // ? mark
733 p_Out[nOutDword] = zChar;
735 m_oBuffer.SetLength(nOutDword*4); // store length reduced by surrogates
736 return m_oBuffer;
741 bool CUtf32beFilter::Decode(/*in out*/ CBuffer & data)
744 // swap BYTEs order in DWORDs
745 UINT64 * p64 = (UINT64 *)(void *)data;
746 int nQwords = data.GetLength()/8;
747 for (int nQword = 0; nQword<nQwords; nQword++)
749 p64[nQword] = DwordSwapBytes(p64[nQword]);
752 UINT32 * p32 = (UINT32 *)p64;
753 int nDwords = data.GetLength()/4;
754 for (int nDword = nQwords*2; nDword<nDwords; nDword++)
756 p32[nDword] = DwordSwapBytes(p32[nDword]);
758 return CUtf32leFilter::Decode(data);
761 const CBuffer & CUtf32beFilter::Encode(const CString s)
763 CUtf32leFilter::Encode(s);
765 // swap BYTEs order in DWORDs
766 UINT64 * p64 = (UINT64 *)(void *)m_oBuffer;
767 int nQwords = m_oBuffer.GetLength()/8;
768 for (int nQword = 0; nQword<nQwords; nQword++)
770 p64[nQword] = DwordSwapBytes(p64[nQword]);
773 UINT32 * p32 = (UINT32 *)p64;
774 int nDwords = m_oBuffer.GetLength()/4;
775 for (int nDword = nQwords*2; nDword<nDwords; nDword++)
777 p32[nDword] = DwordSwapBytes(p32[nDword]);
779 return m_oBuffer;