Merging the SubLib project with Gnome Subtitles
[gn-sub.git] / src / SubLib / IO / Input / SubtitleInput.cs
blobf60f86398d7b986f07d5fbc2192e049fef212f0c
1 /*
2 * This file is part of SubLib.
3 * Copyright (C) 2005-2008 Pedro Castro
5 * SubLib is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * SubLib is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 using SubLib.Core.Domain;
21 using SubLib.Exceptions;
22 using SubLib.IO.Output;
23 using SubLib.IO.SubtitleFormats;
24 using System;
25 using System.IO;
26 using System.Text;
28 namespace SubLib.IO.Input {
30 internal class SubtitleInput {
31 private Encoding fallbackEncoding = null;
32 private SubtitleType subtitleType = SubtitleType.Unknown;
34 internal SubtitleInput (Encoding fallbackEncoding, SubtitleType subtitleType) {
35 this.fallbackEncoding = fallbackEncoding;
36 this.subtitleType = subtitleType;
39 /// <exception cref="EncodingNotSupportedException">Thrown if the encoding is not supported by the platform.</exception>
40 /// <exception cref="UnknownSubtitleFormatException">Thrown if the subtitle format could not be detected.</exception>
41 internal string Read (string path, out Encoding encoding, out SubtitleFormat format) {
42 /* Open file */
43 FileStream fileStream = FileInputOutput.OpenFileForReading(path);
45 return ReadSubtitleText(true, fileStream, out encoding, out format);
48 /// <exception cref="UnknownSubtitleFormatException">Thrown if the subtitle format could not be detected.</exception>
49 internal string Read (string path, Encoding encoding, out SubtitleFormat format) {
50 /* Open file */
51 FileStream fileStream = FileInputOutput.OpenFileForReading(path);
53 /* Read the text */
54 return TestEncoding(fileStream, encoding, out format);
57 /// <exception cref="EncodingNotSupportedException">Thrown if the encoding is not supported by the platform.</exception>
58 internal string ReadPlain (string path, out Encoding encoding) {
59 /* Open file */
60 FileStream fileStream = FileInputOutput.OpenFileForReading(path);
62 SubtitleFormat format = null;
63 return ReadSubtitleText(false, fileStream, out encoding, out format);
66 /// <exception cref="EncodingNotSupportedException">Thrown if the encoding is not supported by the platform.</exception>
67 internal string ReadPlain (string path, Encoding encoding) {
68 /* Open file */
69 FileStream fileStream = FileInputOutput.OpenFileForReading(path);
71 /* Read the text */
72 return TestEncoding(fileStream, encoding);
75 /* Private methods */
77 /// <summary>Checks the encoding of a file.</summary>
78 /// <param name="isSubtitleFile">If it is a subtitle file or a plain text one.</param>
79 /// <param name="fileStream">The stream for reading the file.</param>
80 /// <param name="usedEncoding">The encoding supposedly used.</param>
81 /// <param name="usedFormat">The subtitle format used.</param>
82 /// <exception cref="EncodingNotSupportedException">Thrown if the encoding is not supported by the platform.</exception>
83 /// <exception cref="UnknownSubtitleFormatException">Thrown if the subtitle format could not be detected.</exception>
84 private string ReadSubtitleText (bool isSubtitleFile, FileStream fileStream, out Encoding usedEncoding, out SubtitleFormat usedFormat) {
85 /* Init the out arguments */
86 usedEncoding = null;
87 usedFormat = null;
89 /* Detect code pages */
90 int[] codePages = FileInputOutput.DetectCodePages(fileStream);
92 /* Check if no codepage was detected */
93 if (codePages.Length == 0) {
94 VerboseConsole.WriteLine("No encoding was automatically detected. Using the fall-back encoding: " + fallbackEncoding.WebName);
95 string text;
96 if (isSubtitleFile)
97 text = TestEncoding(fileStream, fallbackEncoding, out usedFormat);
98 else
99 text = TestEncoding(fileStream, fallbackEncoding);
100 usedEncoding = fallbackEncoding;
101 return text;
104 /* The first code page represents the most probable encoding. If any problem occurs when trying to use
105 * that code page, this problem is registered. The remaining code pages are then tried, and if none works,
106 * the first occuring error is the one to be reported. */
107 Exception firstEncodingException = null;
108 Exception firstSubtitleFormatException = null;
109 int firstCodePage = codePages[0];
110 try {
111 string text;
112 if (isSubtitleFile)
113 text = TestCodePage(fileStream, firstCodePage, out usedEncoding, out usedFormat);
114 else
115 text = TestCodePagePlain(fileStream, firstCodePage, out usedEncoding);
116 return text;
118 catch (EncodingNotSupportedException e) {
119 firstEncodingException = e;
121 catch (UnknownSubtitleFormatException e) {
122 firstSubtitleFormatException = e;
125 /* Problems were found, going to try additional code pages */
126 for (int count = 1 ; count < codePages.Length ; count++) {
127 try {
128 int codePage = codePages[count];
129 string text;
130 if (isSubtitleFile)
131 text = TestCodePage(fileStream, codePage, out usedEncoding, out usedFormat);
132 else
133 text = TestCodePagePlain(fileStream, codePage, out usedEncoding);
134 return text;
136 catch (Exception) {
137 //Don't do anything, will try the next code page
141 /* No code page worked, throwing the exceptions caught for the first (more probable) code page */
142 if (firstEncodingException != null)
143 throw firstEncodingException;
144 else
145 throw firstSubtitleFormatException;
149 /// <exception cref="EncodingNotSupportedException">Thrown if the encoding is not supported by the platform.</exception>
150 /// <exception cref="UnknownSubtitleFormatException">Thrown if the subtitle format could not be detected.</exception>
151 private string TestCodePage (FileStream fileStream, int codePage, out Encoding encoding, out SubtitleFormat format) {
152 /* Check the encoding */
153 TestCodePageCommon(codePage, out encoding);
154 return TestEncoding(fileStream, encoding, out format);
157 /// <exception cref="EncodingNotSupportedException">Thrown if the encoding is not supported by the platform.</exception>
158 private string TestCodePagePlain (FileStream fileStream, int codePage, out Encoding encoding) {
159 /* Check the encoding */
160 TestCodePageCommon(codePage, out encoding);
161 return TestEncoding(fileStream, encoding);
164 private void TestCodePageCommon (int codePage, out Encoding encoding) {
165 /* Check the encoding */
166 try {
167 encoding = Encoding.GetEncoding(codePage);
169 catch (Exception) {
170 throw new EncodingNotSupportedException();
174 /// <exception cref="UnknownSubtitleFormatException">Thrown if the subtitle format could not be detected.</exception>
175 private string TestEncoding (FileStream fileStream, Encoding encoding, out SubtitleFormat format) {
176 /* Get the text */
177 string text = TestEncoding(fileStream, encoding);
179 /* Check the subtitle format */
180 format = GetSubtitleFormat(text);
182 return text;
185 private string TestEncoding (FileStream fileStream, Encoding encoding) {
186 VerboseConsole.WriteLine("Trying the encoding " + encoding.WebName);
187 /* Get the text */
188 string text = FileInputOutput.ReadFile(fileStream, encoding, true);
190 return text;
193 /// <exception cref="UnknownSubtitleFormatException">Thrown if the subtitle format could not be detected.</exception>
194 private SubtitleFormat GetSubtitleFormat (string text) {
195 if (subtitleType == SubtitleType.Unknown)
196 VerboseConsole.WriteLine("Trying to autodetect the subtitle format.");
197 else
198 VerboseConsole.WriteLine("Trying the subtitle format " + subtitleType);
200 SubtitleFormat subtitleFormat = null;
201 if (subtitleType == SubtitleType.Unknown)
202 subtitleFormat = BuiltInSubtitleFormats.Detect(text);
203 else
204 subtitleFormat = BuiltInSubtitleFormats.GetFormat(subtitleType);
206 return subtitleFormat;