Bugfix: PGS YVU => YUV
[xy_vsfilter.git] / src / subtitles / TextFile.cpp
blob4fd129e270d45f877c07de106202d0a2356eaae2
1 /*
2 * Copyright (C) 2003-2006 Gabest
3 * http://www.gabest.org
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
8 * any later version.
9 *
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with GNU Make; see the file COPYING. If not, write to
17 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
18 * http://www.gnu.org/copyleft/gpl.html
22 #include "stdafx.h"
23 #include <atlbase.h>
24 #include <afxinet.h>
25 #include "TextFile.h"
26 #include "Utf8.h"
28 CTextFile::CTextFile(enc e)
29 : m_encoding(e)
30 , m_defaultencoding(e)
31 , m_offset(0)
35 bool CTextFile::Open(LPCTSTR lpszFileName)
37 if (!__super::Open(lpszFileName, modeRead | typeBinary | shareDenyNone)) {
38 return false;
41 m_encoding = m_defaultencoding;
42 m_offset = 0;
44 if(__super::GetLength() >= 2)
46 WORD w;
47 if(sizeof(w) != Read(&w, sizeof(w)))
48 return Close(), false;
50 if(w == 0xfeff)
52 m_encoding = LE16;
53 m_offset = 2;
55 else if(w == 0xfffe)
57 m_encoding = BE16;
58 m_offset = 2;
60 else if(w == 0xbbef && __super::GetLength() >= 3)
62 BYTE b;
63 if(sizeof(b) != Read(&b, sizeof(b)))
64 return Close(), false;
66 if(b == 0xbf)
68 m_encoding = UTF8;
69 m_offset = 3;
74 if (m_encoding == ASCII) {
75 if (!ReopenAsText()) {
76 return false;
78 } else if (m_offset == 0) { // No BOM detected, ensure the file is read from the beginning
79 Seek(0, begin);
82 return true;
85 bool CTextFile::ReopenAsText()
87 __super::Close(); // CWebTextFile::Close() would delete the temp file if we called it...
89 return __super::Open(m_strFileName, modeRead | typeText | shareDenyNone)==TRUE;
92 bool CTextFile::Save(LPCTSTR lpszFileName, enc e)
94 if(!__super::Open(lpszFileName, modeCreate|modeWrite|shareDenyWrite|(e==ASCII?typeText:typeBinary)))
95 return(false);
97 if(e == UTF8)
99 BYTE b[3] = {0xef,0xbb,0xbf};
100 Write(b, sizeof(b));
102 else if(e == LE16)
104 BYTE b[2] = {0xff,0xfe};
105 Write(b, sizeof(b));
107 else if(e == BE16)
109 BYTE b[2] = {0xfe,0xff};
110 Write(b, sizeof(b));
113 m_encoding = e;
115 return true;
118 void CTextFile::SetEncoding(enc e)
120 m_encoding = e;
123 CTextFile::enc CTextFile::GetEncoding()
125 return m_encoding;
128 bool CTextFile::IsUnicode()
130 return m_encoding == UTF8 || m_encoding == LE16 || m_encoding == BE16;
133 // CFile
135 CString CTextFile::GetFilePath() const
137 // to avoid a CException coming from CTime
138 return m_strFileName; // __super::GetFilePath();
141 // CStdioFile
143 ULONGLONG CTextFile::GetPosition() const
145 return(CStdioFile::GetPosition() - m_offset);
148 ULONGLONG CTextFile::GetLength() const
150 return(CStdioFile::GetLength() - m_offset);
153 ULONGLONG CTextFile::Seek(LONGLONG lOff, UINT nFrom)
155 ULONGLONG pos = GetPosition();
156 ULONGLONG len = GetLength();
158 switch(nFrom)
160 default:
161 case begin: lOff = lOff; break;
162 case current: lOff = pos + lOff; break;
163 case end: lOff = len - lOff; break;
166 lOff = max(min((ULONGLONG)lOff, len), 0) + m_offset;
168 pos = CStdioFile::Seek(lOff, begin) - m_offset;
170 return(pos);
173 void CTextFile::WriteString(LPCSTR lpsz/*CStringA str*/)
175 CStringA str(lpsz);
177 if(m_encoding == ASCII)
179 __super::WriteString(AToT(str));
181 else if(m_encoding == ANSI)
183 str.Replace("\n", "\r\n");
184 Write((LPCSTR)str, str.GetLength());
186 else if(m_encoding == UTF8)
188 WriteString(AToW(str));
190 else if(m_encoding == LE16)
192 WriteString(AToW(str));
194 else if(m_encoding == BE16)
196 WriteString(AToW(str));
200 void CTextFile::WriteString(LPCWSTR lpsz/*CStringW str*/)
202 CStringW str(lpsz);
204 if(m_encoding == ASCII)
206 __super::WriteString(WToT(str));
208 else if(m_encoding == ANSI)
210 str.Replace(L"\n", L"\r\n");
211 CStringA stra = CStringA(CString(str)); // TODO: codepage
212 Write((LPCSTR)stra, stra.GetLength());
214 else if(m_encoding == UTF8)
216 str.Replace(L"\n", L"\r\n");
217 for (unsigned int i = 0, l = str.GetLength(); i < l; i++)
219 DWORD c = (WORD)str[i];
221 if(0 <= c && c < 0x80) // 0xxxxxxx
223 Write(&c, 1);
225 else if(0x80 <= c && c < 0x800) // 110xxxxx 10xxxxxx
227 c = 0xc080|((c<<2)&0x1f00)|(c&0x003f);
228 Write((BYTE*)&c+1, 1);
229 Write(&c, 1);
231 else if(0x800 <= c && c < 0xFFFF) // 1110xxxx 10xxxxxx 10xxxxxx
233 c = 0xe08080|((c<<4)&0x0f0000)|((c<<2)&0x3f00)|(c&0x003f);
234 Write((BYTE*)&c+2, 1);
235 Write((BYTE*)&c+1, 1);
236 Write(&c, 1);
238 else
240 c = '?';
241 Write(&c, 1);
245 else if(m_encoding == LE16)
247 str.Replace(L"\n", L"\r\n");
248 Write((LPCWSTR)str, str.GetLength()*2);
250 else if(m_encoding == BE16)
252 str.Replace(L"\n", L"\r\n");
253 for (unsigned int i = 0, l = str.GetLength(); i < l; i++) {
254 str.SetAt(i, ((str[i] >> 8) & 0x00ff) | ((str[i] << 8) & 0xff00));
256 Write((LPCWSTR)str, str.GetLength() * 2);
260 BOOL CTextFile::ReadString(CStringA& str)
262 bool fEOF = true;
264 str.Empty();
266 if(m_encoding == ASCII)
268 CString s;
269 fEOF = !__super::ReadString(s);
270 str = TToA(s);
271 // For consistency with other encodings, we continue reading
272 // the file even when a NUL char is encountered.
273 char c;
274 while (fEOF && (Read(&c, sizeof(c)) == sizeof(c))) {
275 str += c;
276 fEOF = !__super::ReadString(s);
277 str += TToA(s);
280 else if(m_encoding == ANSI)
282 char c;
283 while(Read(&c, sizeof(c)) == sizeof(c))
285 fEOF = false;
286 if(c == '\r') continue;
287 if(c == '\n') break;
288 str += c;
291 else if(m_encoding == UTF8)
293 int nBytesRead = 0;
294 BYTE buffer[3];
295 bool bValid = true;
297 while (Read(&buffer[0], sizeof(buffer[0])) == sizeof(buffer[0]))
299 nBytesRead++;
300 fEOF = false;
301 char c = '?';
303 if (Utf8::isSingleByte(buffer[0]))
304 { // 0xxxxxxx
305 c = buffer[0] & 0x7f;
307 else if (Utf8::isFirstOfMultibyte(buffer[0]))
309 int nContinuationBytes = Utf8::continuationBytes(buffer[0]);
310 bValid = (nContinuationBytes <= 2);
312 // We don't support characters wider than 16 bits
313 if (bValid) {
314 UINT nRead = Read(&buffer[1], nContinuationBytes * sizeof(buffer[1]));
315 nBytesRead += nContinuationBytes;
316 bValid = (nRead == nContinuationBytes * sizeof(buffer[1]));
318 if (bValid) {
319 for (int i = 0; i < nContinuationBytes; i++) {
320 if (!Utf8::isContinuation(buffer[i + 1])) {
321 bValid = false;
325 switch (nContinuationBytes) {
326 case 0: // 0xxxxxxx
327 c = buffer[0] & 0x7f;
328 break;
329 case 1: // 110xxxxx 10xxxxxx
330 case 2: // 1110xxxx 10xxxxxx 10xxxxxx
331 // Unsupported for non unicode strings
332 break;
337 else
339 bValid = false;
342 if (bValid)
344 if (c == '\r') {
345 continue;
347 if (c == '\n') {
348 break;
350 str += c;
352 else
354 // Switch to text and read again
355 m_encoding = ASCII;
356 // Rewind to the end of the line and save the position
357 Seek(-nBytesRead, current);
358 ULONGLONG currentPosition = GetPosition();
360 fEOF = !ReopenAsText();
362 if (!fEOF)
364 // Seek back at the beginning of the line where we stopped
365 Seek(currentPosition, begin);
367 fEOF = !ReadString(str);
370 break;
374 else if (m_encoding == LE16)
376 WORD w;
377 while(Read(&w, sizeof(w)) == sizeof(w))
379 fEOF = false;
380 char c = '?';
381 if(!(w&0xff00)) c = w&0xff;
382 if(c == '\r') continue;
383 if(c == '\n') break;
384 str += c;
387 else if(m_encoding == BE16)
389 WORD w;
390 while(Read(&w, sizeof(w)) == sizeof(w))
392 fEOF = false;
393 char c = '?';
394 if(!(w&0xff)) c = w>>8;
395 if(c == '\r') continue;
396 if(c == '\n') break;
397 str += c;
401 return(!fEOF);
404 BOOL CTextFile::ReadString(CStringW& str)
406 bool fEOF = true;
408 str.Empty();
410 if(m_encoding == ASCII)
412 CString s;
413 fEOF = !__super::ReadString(s);
414 str = TToW(s);
415 // For consistency with other encodings, we continue reading
416 // the file even when a NUL char is encountered.
417 char c;
418 while (fEOF && (Read(&c, sizeof(c)) == sizeof(c))) {
419 str += c;
420 fEOF = !__super::ReadString(s);
421 str += TToW(s);
424 else if(m_encoding == ANSI)
426 CStringA stra;
427 char c;
428 while(Read(&c, sizeof(c)) == sizeof(c))
430 fEOF = false;
431 if(c == '\r') continue;
432 if(c == '\n') break;
433 stra += c;
435 str = CStringW(CString(stra)); // TODO: codepage
437 else if(m_encoding == UTF8)
439 int nBytesRead = 0;
440 BYTE buffer[3];
441 bool bValid = true;
443 while (Read(&buffer[0], sizeof(buffer[0])) == sizeof(buffer[0])) {
444 nBytesRead++;
445 fEOF = false;
446 WCHAR c = L'?';
448 if (Utf8::isSingleByte(buffer[0])) { // 0xxxxxxx
449 c = buffer[0] & 0x7f;
450 } else if (Utf8::isFirstOfMultibyte(buffer[0])) {
451 int nContinuationBytes = Utf8::continuationBytes(buffer[0]);
452 bValid = (nContinuationBytes <= 2);
454 // We don't support characters wider than 16 bits
455 if (bValid) {
456 UINT nRead = Read(&buffer[1], nContinuationBytes * sizeof(buffer[1]));
457 nBytesRead += nContinuationBytes;
458 bValid = (nRead == nContinuationBytes * sizeof(buffer[1]));
460 if (bValid) {
461 for (int i = 0; i < nContinuationBytes; i++) {
462 if (!Utf8::isContinuation(buffer[i + 1])) {
463 bValid = false;
467 switch (nContinuationBytes) {
468 case 0: // 0xxxxxxx
469 c = buffer[0] & 0x7f;
470 break;
471 case 1: // 110xxxxx 10xxxxxx
472 c = (buffer[0] & 0x1f) << 6 | (buffer[1] & 0x3f);
473 break;
474 case 2: // 1110xxxx 10xxxxxx 10xxxxxx
475 c = (buffer[0] & 0x0f) << 12 | (buffer[1] & 0x3f) << 6 | (buffer[2] & 0x3f);
476 break;
480 } else {
481 bValid = false;
484 if (bValid) {
485 if (c == '\r') {
486 continue;
488 if (c == '\n') {
489 break;
491 str += c;
492 } else {
493 // Switch to text and read again
494 m_encoding = ASCII;
495 // Rewind to the end of the line and save the position
496 Seek(-nBytesRead, current);
497 ULONGLONG currentPosition = GetPosition();
499 fEOF = !ReopenAsText();
501 if (!fEOF) {
502 // Seek back to the beginning of the line where we stopped
503 Seek(currentPosition, begin);
505 fEOF = !ReadString(str);
508 break;
512 else if(m_encoding == LE16)
514 WCHAR wc;
515 while(Read(&wc, sizeof(wc)) == sizeof(wc))
517 fEOF = false;
518 if(wc == '\r') continue;
519 if(wc == '\n') break;
520 str += wc;
523 else if(m_encoding == BE16)
525 WCHAR wc;
526 while(Read(&wc, sizeof(wc)) == sizeof(wc))
528 fEOF = false;
529 wc = ((wc>>8)&0x00ff)|((wc<<8)&0xff00);
530 if(wc == '\r') continue;
531 if(wc == '\n') break;
532 str += wc;
536 return(!fEOF);
539 UINT CTextFile::Read( void* lpBuf, UINT nCount )
541 return __super::Read(lpBuf,nCount);
544 // CWebTextFile
547 CWebTextFile::CWebTextFile(CTextFile::enc e, LONGLONG llMaxSize)
548 : CTextFile(e)
549 , m_llMaxSize(llMaxSize)
553 bool CWebTextFile::Open(LPCTSTR lpszFileName)
555 CString fn(lpszFileName);
557 if(fn.Find(_T("http://")) != 0)
558 return __super::Open(lpszFileName);
562 CInternetSession is;
564 CAutoPtr<CStdioFile> f(is.OpenURL(fn, 1, INTERNET_FLAG_TRANSFER_BINARY|INTERNET_FLAG_EXISTING_CONNECT));
565 if(!f) return(false);
567 TCHAR path[MAX_PATH];
568 GetTempPath(MAX_PATH, path);
570 fn = path + fn.Mid(fn.ReverseFind('/')+1);
571 int i = fn.Find(_T("?"));
572 if(i > 0) fn = fn.Left(i);
573 CFile temp;
574 if(!temp.Open(fn, modeCreate|modeWrite|typeBinary|shareDenyWrite))
576 f->Close();
577 return(false);
580 BYTE buff[1024];
581 int len, total = 0;
582 while((len = f->Read(buff, 1024)) == 1024 && (m_llMaxSize < 0 || (total+=1024) < m_llMaxSize))
583 temp.Write(buff, len);
584 if(len > 0) temp.Write(buff, len);
586 m_tempfn = fn;
588 f->Close(); // must close it because the desctructor doesn't seem to do it and we will get an exception when "is" is destroying
590 catch(CInternetException* ie)
592 ie->Delete();
593 return(false);
596 return __super::Open(m_tempfn);
599 bool CWebTextFile::Save(LPCTSTR lpszFileName, enc e)
601 // CWebTextFile is read-only...
602 ASSERT(0);
603 return(false);
606 void CWebTextFile::Close()
608 __super::Close();
610 if(!m_tempfn.IsEmpty())
612 _tremove(m_tempfn);
613 m_tempfn.Empty();
617 ///////////////////////////////////////////////////////////////
619 CStringW AToW(const CStringA& str)
621 CStringW ret;
622 for(int i = 0, j = str.GetLength(); i < j; i++)
623 ret += (WCHAR)(BYTE)str[i];
624 return(ret);
627 CStringA WToA(const CStringW& str)
629 CStringA ret;
630 for(int i = 0, j = str.GetLength(); i < j; i++)
631 ret += (CHAR)(WORD)str[i];
632 return(ret);
635 CString AToT(const CStringA& str)
637 CString ret;
638 for(int i = 0, j = str.GetLength(); i < j; i++)
639 ret += (TCHAR)(BYTE)str[i];
640 return(ret);
643 CString WToT(const CStringW& str)
645 #ifdef UNICODE
646 return str;
647 #else
648 CString ret;
649 for(int i = 0, j = str.GetLength(); i < j; i++)
650 ret += (TCHAR)(WORD)str[i];
651 return(ret);
652 #endif
655 CStringA TToA(const CString& str)
657 CStringA ret;
658 #ifdef UNICODE
659 for(int i = 0, j = str.GetLength(); i < j; i++)
660 ret += (CHAR)(BYTE)str[i];
661 #else
662 ret = str;
663 #endif
664 return(ret);
667 CStringW TToW(const CString& str)
669 CStringW ret;
670 #ifdef UNICODE
671 ret = str;
672 #else
673 for(size_t i = 0, j = str.GetLength(); i < j; i++)
674 ret += (WCHAR)(BYTE)str[i];
675 #endif
676 return(ret);