1
// TortoiseGitMerge - a Diff/Patch program
3 // Copyright (C) 2023 - TortoiseGit
4 // Copyright (C) 2006-2007, 2012-2016, 2019, 2023 - TortoiseSVN
6 // This program is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU General Public License
8 // as published by the Free Software Foundation; either version 2
9 // of the License, or (at your option) any later version.
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with this program; if not, write to the Free Software Foundation,
18 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 // A template class to make an array which looks like a CStringArray or CDWORDArray but
27 // is in fact based on a STL vector, which is much faster at large sizes
28 template <typename T
> class CStdArrayV
31 int GetCount() const { return static_cast<int>(m_vec
.size()); }
32 const T
& GetAt(int index
) const { return m_vec
[index
]; }
33 void RemoveAt(int index
) { m_vec
.erase(m_vec
.begin()+index
); }
34 void InsertAt(int index
, const T
& strVal
) { m_vec
.insert(m_vec
.begin()+index
, strVal
); }
35 void InsertAt(int index
, const T
& strVal
, int nCopies
) { m_vec
.insert(m_vec
.begin()+index
, nCopies
, strVal
); }
36 void SetAt(int index
, const T
& strVal
) { m_vec
[index
] = strVal
; }
37 void Add(const T
& strVal
) {
38 if (m_vec
.size()==m_vec
.capacity()) {
39 m_vec
.reserve(m_vec
.capacity() ? m_vec
.capacity()*2 : 256);
41 m_vec
.push_back(strVal
);
43 void RemoveAll() { m_vec
.clear(); }
44 void Reserve(int nHintSize
) { m_vec
.reserve(nHintSize
); }
50 // A template class to make an array which looks like a CStringArray or CDWORDArray but
51 // is in fact based on a STL deque, which is much faster at large sizes
52 template <typename T
> class CStdArrayD
55 int GetCount() const { return static_cast<int>(m_vec
.size()); }
56 const T
& GetAt(int index
) const { return m_vec
[index
]; }
57 void RemoveAt(int index
) { m_vec
.erase(m_vec
.begin()+index
); }
58 void InsertAt(int index
, const T
& strVal
) { m_vec
.insert(m_vec
.begin()+index
, strVal
); }
59 void InsertAt(int index
, const T
& strVal
, int nCopies
) { m_vec
.insert(m_vec
.begin()+index
, nCopies
, strVal
); }
60 void SetAt(int index
, const T
& strVal
) { m_vec
[index
] = strVal
; }
61 void Add(const T
& strVal
) { m_vec
.push_back(strVal
); }
62 void RemoveAll() { m_vec
.clear(); }
68 using CStdDWORDArray
= CStdArrayV
<DWORD
>;
70 struct CFileTextLine
{
72 EOL eEnding
= EOL::AutoLine
;
74 using CStdFileLineArray
= CStdArrayD
<CFileTextLine
>;
76 * \ingroup TortoiseMerge
78 * Represents an array of text lines which are read from a file.
79 * This class is also responsible for determining the encoding of
80 * the file (e.g. UNICODE(UTF16), UTF8, ASCII, ...).
82 class CFileTextLines
: public CStdFileLineArray
88 enum class UnicodeType
100 UTF8BOM
, //=UTF8+65536,
104 UnicodeType m_UnicodeType
= CFileTextLines::UnicodeType::AUTOTYPE
;
105 EOL m_LineEndings
= EOL::AutoLine
;
109 * Loads the text file and adds each line to the array
110 * \param sFilePath the path to the file
111 * \param lengthHint hint to create line array
113 BOOL
Load(const CString
& sFilePath
, int lengthHint
= 0);
115 * Saves the whole array of text lines to a file, preserving
116 * the line endings detected at Load()
117 * \param sFilePath the path to save the file to
118 * \param bSaveAsUTF8 enforce encoding for save
119 * \param bUseSVNCompatibleEOLs limit EOLs to CRLF, CR and LF, last one is used instead of all others
120 * \param dwIgnoreWhitespaces "enum" mode of removing whitespaces
121 * \param bIgnoreCase converts whole file to lower case
122 * \param bBlame limit line len
124 BOOL
Save(const CString
& sFilePath
125 , bool bSaveAsUTF8
= false
126 , bool bUseSVNCompatibleEOLs
= false
127 , DWORD dwIgnoreWhitespaces
= 0
128 , BOOL bIgnoreCase
= FALSE
129 , bool bBlame
= false
130 , bool bIgnoreComments
= false
131 , const CString
& linestart
= CString()
132 , const CString
& blockstart
= CString()
133 , const CString
& blockend
= CString()
134 , const std::wregex
& rx
= std::wregex()
135 , const std::wstring
& replacement
= L
"");
137 * Returns an error string of the last failed operation
139 CString
GetErrorString() const {return m_sErrorString
;}
141 * Copies the settings of a file like the line ending styles
142 * to another CFileTextLines object.
144 void CopySettings(CFileTextLines
* pFileToCopySettingsTo
) const;
146 bool NeedsConversion() const { return m_bNeedsConversion
; }
147 UnicodeType
GetUnicodeType() const {return m_SaveParams
.m_UnicodeType
;}
148 EOL
GetLineEndings() const {return m_SaveParams
.m_LineEndings
;}
150 void Add(const CString
& sLine
, EOL ending
) { CFileTextLine temp
={sLine
, ending
}; CStdFileLineArray::Add(temp
); }
151 void InsertAt(int index
, const CString
& strVal
, EOL ending
) { CFileTextLine temp
={strVal
, ending
}; CStdFileLineArray::InsertAt(index
, temp
); }
153 const CString
& GetAt(int index
) const { return CStdFileLineArray::GetAt(index
).sLine
; }
154 EOL
GetLineEnding(int index
) const { return CStdFileLineArray::GetAt(index
).eEnding
; }
155 void SetSaveParams(const SaveParams
& sp
) { m_SaveParams
= sp
; }
156 SaveParams
GetSaveParams() const { return m_SaveParams
; }
157 void KeepEncoding(bool bKeep
= true) { m_bKeepEncoding
= bKeep
; }
158 //void SetLineEnding(int index, EOL ending) { CStdFileLineArray::GetAt(index).eEnding = ending; }
160 static const wchar_t * GetEncodingName(UnicodeType
);
163 * Checks the Unicode type in a text buffer
164 * Must be public for TortoiseGitBlame
165 * \param pBuffer pointer to the buffer containing text
166 * \param cb size of the text buffer in bytes
168 UnicodeType
CheckUnicodeType(LPCVOID pBuffer
, int cb
);
171 void SetErrorString();
173 static void StripWhiteSpace(CString
& sLine
, DWORD dwIgnoreWhitespaces
, bool blame
);
174 bool StripComments(CString
& sLine
, bool bInBlockComment
);
175 bool IsInsideString(const CString
& sLine
, int pos
);
176 void LineRegex(CString
& sLine
, const std::wregex
& rx
, const std::wstring
& replacement
) const;
180 CString m_sErrorString
;
181 bool m_bNeedsConversion
= false;
182 bool m_bKeepEncoding
= false;
183 SaveParams m_SaveParams
;
184 CString m_sCommentLine
;
185 CString m_sCommentBlockStart
;
186 CString m_sCommentBlockEnd
;
195 CBuffer(const CBuffer
& Src
) { Copy(Src
); }
196 CBuffer(const CBuffer
* const Src
) { Copy(*Src
); }
197 ~CBuffer() {Free(); }
199 CBuffer
& operator=(const CBuffer
& Src
) { Copy(Src
); return *this; }
200 operator bool () const { return !IsEmpty(); }
201 operator void*() const { return static_cast<void*>(m_pBuffer
); }
202 operator LPSTR() const { return reinterpret_cast<LPSTR
>(m_pBuffer
); }
204 void Clear() { m_nUsed
=0; }
205 void ExpandToAtLeast(int nNewSize
);
206 int GetLength() const { return m_nUsed
; }
207 bool IsEmpty() const { return GetLength()==0; }
208 void SetLength(int nUsed
);
209 void Swap(CBuffer
& Src
) noexcept
;
212 void Copy(const CBuffer
& Src
);
213 void Free() { delete [] m_pBuffer
; }
215 BYTE
* m_pBuffer
= nullptr;
217 int m_nAllocated
= 0;
223 CDecodeFilter() = default;
224 CDecodeFilter(const CDecodeFilter
&) = delete;
225 CDecodeFilter
& operator=(const CDecodeFilter
& Src
) = delete;
226 virtual ~CDecodeFilter()
228 m_deleter(m_pBuffer
);
231 virtual bool Decode(std::unique_ptr
<BYTE
[]> s
, int len
) = 0;
232 std::wstring_view
GetStringView() const
234 if (m_iBufferLength
== 0)
236 return std::wstring_view(m_pBuffer
, m_iBufferLength
);
240 wchar_t* m_pBuffer
= nullptr;
241 int m_iBufferLength
= 0;
242 std::function
<void(void*)> m_deleter
= [](void* ptr
) { delete[] static_cast<wchar_t*>(ptr
); };
248 CEncodeFilter(CStdioFile
* p_File
)
252 virtual ~CEncodeFilter()
256 virtual const CBuffer
& Encode(const CString
& data
) = 0;
257 const CBuffer
& GetBuffer() const {return m_oBuffer
; }
258 void Write(const CString
& s
) { Write(Encode(s
)); } ///< encode into buffer and write
259 void Write() { Write(m_oBuffer
); } ///< write preencoded internal buffer
260 void Write(const CBuffer
& buffer
) { if (buffer
.GetLength()) m_pFile
->Write(static_cast<void*>(buffer
), buffer
.GetLength()); } ///< write preencoded buffer
266 CStdioFile
* m_pFile
;
270 class CAsciiFilter
: public CEncodeFilter
, public CDecodeFilter
273 CAsciiFilter(CStdioFile
* pFile
)
274 : CEncodeFilter(pFile
)
275 , m_nCodePage(CP_ACP
)
278 bool Decode(std::unique_ptr
<BYTE
[]> data
, int len
) override
;
279 const CBuffer
& Encode(const CString
& data
) override
;
283 Code page for MultiByteToWideChar.
289 class CUtf8Filter
: public CAsciiFilter
292 CUtf8Filter(CStdioFile
* pFile
)
293 : CAsciiFilter(pFile
)
295 m_nCodePage
= CP_UTF8
;
300 class CUtf16leFilter
: public CEncodeFilter
, public CDecodeFilter
303 CUtf16leFilter(CStdioFile
* pFile
)
304 : CEncodeFilter(pFile
)
307 bool Decode(std::unique_ptr
<BYTE
[]> data
, int len
) override
;
308 const CBuffer
& Encode(const CString
& s
) override
;
312 class CUtf16beFilter
: public CUtf16leFilter
315 CUtf16beFilter(CStdioFile
*pFile
) : CUtf16leFilter(pFile
){}
317 bool Decode(std::unique_ptr
<BYTE
[]> data
, int len
) override
;
318 const CBuffer
& Encode(const CString
& s
) override
;
322 class CUtf32leFilter
: public CEncodeFilter
, public CDecodeFilter
325 CUtf32leFilter(CStdioFile
* pFile
)
326 : CEncodeFilter(pFile
)
329 bool Decode(std::unique_ptr
<BYTE
[]> data
, int len
) override
;
330 const CBuffer
& Encode(const CString
& s
) override
;
334 class CUtf32beFilter
: public CUtf32leFilter
337 CUtf32beFilter(CStdioFile
*pFile
) : CUtf32leFilter(pFile
){}
339 bool Decode(std::unique_ptr
<BYTE
[]> data
, int len
) override
;
340 const CBuffer
& Encode(const CString
& s
) override
;