Updated libgit to version 2.46.2 based on Git for Windows sources
[TortoiseGit.git] / src / TortoiseMerge / FileTextLines.h
blobec857cb501cd1331aeb0b4e9d5dcf6ef6f405f6a
1 // TortoiseGitMerge - a Diff/Patch program
3 // Copyright (C) 2023 - TortoiseGit
4 // Copyright (C) 2006-2007, 2012-2016, 2019, 2023 - TortoiseSVN
6 // This program is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU General Public License
8 // as published by the Free Software Foundation; either version 2
9 // of the License, or (at your option) any later version.
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with this program; if not, write to the Free Software Foundation,
18 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 #pragma once
21 #include "EOL.h"
22 #include <deque>
23 #include <regex>
24 #include <functional>
26 // A template class to make an array which looks like a CStringArray or CDWORDArray but
27 // is in fact based on a STL vector, which is much faster at large sizes
28 template <typename T> class CStdArrayV
30 public:
31 int GetCount() const { return static_cast<int>(m_vec.size()); }
32 const T& GetAt(int index) const { return m_vec[index]; }
33 void RemoveAt(int index) { m_vec.erase(m_vec.begin()+index); }
34 void InsertAt(int index, const T& strVal) { m_vec.insert(m_vec.begin()+index, strVal); }
35 void InsertAt(int index, const T& strVal, int nCopies) { m_vec.insert(m_vec.begin()+index, nCopies, strVal); }
36 void SetAt(int index, const T& strVal) { m_vec[index] = strVal; }
37 void Add(const T& strVal) {
38 if (m_vec.size()==m_vec.capacity()) {
39 m_vec.reserve(m_vec.capacity() ? m_vec.capacity()*2 : 256);
41 m_vec.push_back(strVal);
43 void RemoveAll() { m_vec.clear(); }
44 void Reserve(int nHintSize) { m_vec.reserve(nHintSize); }
46 private:
47 std::vector<T> m_vec;
50 // A template class to make an array which looks like a CStringArray or CDWORDArray but
51 // is in fact based on a STL deque, which is much faster at large sizes
52 template <typename T> class CStdArrayD
54 public:
55 int GetCount() const { return static_cast<int>(m_vec.size()); }
56 const T& GetAt(int index) const { return m_vec[index]; }
57 void RemoveAt(int index) { m_vec.erase(m_vec.begin()+index); }
58 void InsertAt(int index, const T& strVal) { m_vec.insert(m_vec.begin()+index, strVal); }
59 void InsertAt(int index, const T& strVal, int nCopies) { m_vec.insert(m_vec.begin()+index, nCopies, strVal); }
60 void SetAt(int index, const T& strVal) { m_vec[index] = strVal; }
61 void Add(const T& strVal) { m_vec.push_back(strVal); }
62 void RemoveAll() { m_vec.clear(); }
64 private:
65 std::deque<T> m_vec;
68 using CStdDWORDArray = CStdArrayV<DWORD>;
70 struct CFileTextLine {
71 CString sLine;
72 EOL eEnding = EOL::AutoLine;
74 using CStdFileLineArray = CStdArrayD<CFileTextLine>;
75 /**
76 * \ingroup TortoiseMerge
78 * Represents an array of text lines which are read from a file.
79 * This class is also responsible for determining the encoding of
80 * the file (e.g. UNICODE(UTF16), UTF8, ASCII, ...).
82 class CFileTextLines : public CStdFileLineArray
84 public:
85 CFileTextLines();
86 ~CFileTextLines();
88 enum class UnicodeType
90 AUTOTYPE,
91 BINARY,
92 ASCII,
93 UTF16_LE, //=1200,
94 UTF16_BE, //=1201,
95 UTF16_LEBOM, //=1200,
96 UTF16_BEBOM, //=1201,
97 UTF32_LE, //=12000,
98 UTF32_BE, //=12001,
99 UTF8, //=65001,
100 UTF8BOM, //=UTF8+65536,
103 struct SaveParams {
104 UnicodeType m_UnicodeType = CFileTextLines::UnicodeType::AUTOTYPE;
105 EOL m_LineEndings = EOL::AutoLine;
109 * Loads the text file and adds each line to the array
110 * \param sFilePath the path to the file
111 * \param lengthHint hint to create line array
113 BOOL Load(const CString& sFilePath, int lengthHint = 0);
115 * Saves the whole array of text lines to a file, preserving
116 * the line endings detected at Load()
117 * \param sFilePath the path to save the file to
118 * \param bSaveAsUTF8 enforce encoding for save
119 * \param bUseSVNCompatibleEOLs limit EOLs to CRLF, CR and LF, last one is used instead of all others
120 * \param dwIgnoreWhitespaces "enum" mode of removing whitespaces
121 * \param bIgnoreCase converts whole file to lower case
122 * \param bBlame limit line len
124 BOOL Save(const CString& sFilePath
125 , bool bSaveAsUTF8 = false
126 , bool bUseSVNCompatibleEOLs = false
127 , DWORD dwIgnoreWhitespaces = 0
128 , BOOL bIgnoreCase = FALSE
129 , bool bBlame = false
130 , bool bIgnoreComments = false
131 , const CString& linestart = CString()
132 , const CString& blockstart = CString()
133 , const CString& blockend = CString()
134 , const std::wregex& rx = std::wregex()
135 , const std::wstring& replacement = L"");
137 * Returns an error string of the last failed operation
139 CString GetErrorString() const {return m_sErrorString;}
141 * Copies the settings of a file like the line ending styles
142 * to another CFileTextLines object.
144 void CopySettings(CFileTextLines * pFileToCopySettingsTo) const;
146 bool NeedsConversion() const { return m_bNeedsConversion; }
147 UnicodeType GetUnicodeType() const {return m_SaveParams.m_UnicodeType;}
148 EOL GetLineEndings() const {return m_SaveParams.m_LineEndings;}
150 void Add(const CString& sLine, EOL ending) { CFileTextLine temp={sLine, ending}; CStdFileLineArray::Add(temp); }
151 void InsertAt(int index, const CString& strVal, EOL ending) { CFileTextLine temp={strVal, ending}; CStdFileLineArray::InsertAt(index, temp); }
153 const CString& GetAt(int index) const { return CStdFileLineArray::GetAt(index).sLine; }
154 EOL GetLineEnding(int index) const { return CStdFileLineArray::GetAt(index).eEnding; }
155 void SetSaveParams(const SaveParams& sp) { m_SaveParams = sp; }
156 SaveParams GetSaveParams() const { return m_SaveParams; }
157 void KeepEncoding(bool bKeep = true) { m_bKeepEncoding = bKeep; }
158 //void SetLineEnding(int index, EOL ending) { CStdFileLineArray::GetAt(index).eEnding = ending; }
160 static const wchar_t * GetEncodingName(UnicodeType);
163 * Checks the Unicode type in a text buffer
164 * Must be public for TortoiseGitBlame
165 * \param pBuffer pointer to the buffer containing text
166 * \param cb size of the text buffer in bytes
168 UnicodeType CheckUnicodeType(LPCVOID pBuffer, int cb);
170 private:
171 void SetErrorString();
173 static void StripWhiteSpace(CString& sLine, DWORD dwIgnoreWhitespaces, bool blame);
174 bool StripComments(CString& sLine, bool bInBlockComment);
175 bool IsInsideString(const CString& sLine, int pos);
176 void LineRegex(CString& sLine, const std::wregex& rx, const std::wstring& replacement) const;
179 private:
180 CString m_sErrorString;
181 bool m_bNeedsConversion = false;
182 bool m_bKeepEncoding = false;
183 SaveParams m_SaveParams;
184 CString m_sCommentLine;
185 CString m_sCommentBlockStart;
186 CString m_sCommentBlockEnd;
191 class CBuffer
193 public:
194 CBuffer() = default;
195 CBuffer(const CBuffer& Src) { Copy(Src); }
196 CBuffer(const CBuffer* const Src) { Copy(*Src); }
197 ~CBuffer() {Free(); }
199 CBuffer& operator=(const CBuffer& Src) { Copy(Src); return *this; }
200 operator bool () const { return !IsEmpty(); }
201 operator void*() const { return static_cast<void*>(m_pBuffer); }
202 operator LPSTR() const { return reinterpret_cast<LPSTR>(m_pBuffer); }
204 void Clear() { m_nUsed=0; }
205 void ExpandToAtLeast(int nNewSize);
206 int GetLength() const { return m_nUsed; }
207 bool IsEmpty() const { return GetLength()==0; }
208 void SetLength(int nUsed);
209 void Swap(CBuffer& Src) noexcept;
211 private:
212 void Copy(const CBuffer & Src);
213 void Free() { delete [] m_pBuffer; }
215 BYTE* m_pBuffer = nullptr;
216 int m_nUsed = 0;
217 int m_nAllocated = 0;
220 class CDecodeFilter
222 public:
223 CDecodeFilter() = default;
224 CDecodeFilter(const CDecodeFilter&) = delete;
225 CDecodeFilter& operator=(const CDecodeFilter& Src) = delete;
226 virtual ~CDecodeFilter()
228 m_deleter(m_pBuffer);
231 virtual bool Decode(std::unique_ptr<BYTE[]> s, int len) = 0;
232 std::wstring_view GetStringView() const
234 if (m_iBufferLength == 0)
235 return {};
236 return std::wstring_view(m_pBuffer, m_iBufferLength);
239 protected:
240 wchar_t* m_pBuffer = nullptr;
241 int m_iBufferLength = 0;
242 std::function<void(void*)> m_deleter = [](void* ptr) { delete[] static_cast<wchar_t*>(ptr); };
245 class CEncodeFilter
247 public:
248 CEncodeFilter(CStdioFile* p_File)
249 : m_pFile(p_File)
252 virtual ~CEncodeFilter()
256 virtual const CBuffer& Encode(const CString& data) = 0;
257 const CBuffer & GetBuffer() const {return m_oBuffer; }
258 void Write(const CString& s) { Write(Encode(s)); } ///< encode into buffer and write
259 void Write() { Write(m_oBuffer); } ///< write preencoded internal buffer
260 void Write(const CBuffer & buffer) { if (buffer.GetLength()) m_pFile->Write(static_cast<void*>(buffer), buffer.GetLength()); } ///< write preencoded buffer
262 protected:
263 CBuffer m_oBuffer;
265 private:
266 CStdioFile * m_pFile;
270 class CAsciiFilter : public CEncodeFilter, public CDecodeFilter
272 public:
273 CAsciiFilter(CStdioFile* pFile)
274 : CEncodeFilter(pFile)
275 , m_nCodePage(CP_ACP)
278 bool Decode(std::unique_ptr<BYTE[]> data, int len) override;
279 const CBuffer& Encode(const CString& data) override;
281 protected:
283 Code page for MultiByteToWideChar.
285 UINT m_nCodePage;
289 class CUtf8Filter : public CAsciiFilter
291 public:
292 CUtf8Filter(CStdioFile* pFile)
293 : CAsciiFilter(pFile)
295 m_nCodePage = CP_UTF8;
300 class CUtf16leFilter : public CEncodeFilter, public CDecodeFilter
302 public:
303 CUtf16leFilter(CStdioFile* pFile)
304 : CEncodeFilter(pFile)
307 bool Decode(std::unique_ptr<BYTE[]> data, int len) override;
308 const CBuffer& Encode(const CString& s) override;
312 class CUtf16beFilter : public CUtf16leFilter
314 public:
315 CUtf16beFilter(CStdioFile *pFile) : CUtf16leFilter(pFile){}
317 bool Decode(std::unique_ptr<BYTE[]> data, int len) override;
318 const CBuffer& Encode(const CString& s) override;
322 class CUtf32leFilter : public CEncodeFilter, public CDecodeFilter
324 public:
325 CUtf32leFilter(CStdioFile* pFile)
326 : CEncodeFilter(pFile)
329 bool Decode(std::unique_ptr<BYTE[]> data, int len) override;
330 const CBuffer& Encode(const CString& s) override;
334 class CUtf32beFilter : public CUtf32leFilter
336 public:
337 CUtf32beFilter(CStdioFile *pFile) : CUtf32leFilter(pFile){}
339 bool Decode(std::unique_ptr<BYTE[]> data, int len) override;
340 const CBuffer& Encode(const CString& s) override;