1 // HtmlAgilityPack V1.0 - Simon Mourier <simon underscore mourier at hotmail dot com>
6 namespace HtmlAgilityPack
9 /// Represents a document with mixed code and text. ASP, ASPX, JSP, are good example of such documents.
11 public class MixedCodeDocument
16 internal MixedCodeDocumentFragmentList _codefragments
;
17 private MixedCodeDocumentFragment _currentfragment
;
18 internal MixedCodeDocumentFragmentList _fragments
;
21 private int _lineposition
;
22 private ParseState _state
;
23 private Encoding _streamencoding
;
24 internal string _text
;
25 internal MixedCodeDocumentFragmentList _textfragments
;
28 /// Gets or sets the token representing code end.
30 public string TokenCodeEnd
= "%>";
33 /// Gets or sets the token representing code start.
35 public string TokenCodeStart
= "<%";
38 /// Gets or sets the token representing code directive.
40 public string TokenDirective
= "@";
43 /// Gets or sets the token representing response write directive.
45 public string TokenResponseWrite
= "Response.Write ";
48 private string TokenTextBlock
= "TextBlock({0})";
55 /// Creates a mixed code document instance.
57 public MixedCodeDocument()
59 _codefragments
= new MixedCodeDocumentFragmentList(this);
60 _textfragments
= new MixedCodeDocumentFragmentList(this);
61 _fragments
= new MixedCodeDocumentFragmentList(this);
69 /// Gets the code represented by the mixed code document seen as a template.
77 foreach (MixedCodeDocumentFragment frag
in _fragments
)
81 case MixedCodeDocumentFragmentType
.Text
:
82 s
+= TokenResponseWrite
+ string.Format(TokenTextBlock
, i
) + "\n";
86 case MixedCodeDocumentFragmentType
.Code
:
87 s
+= ((MixedCodeDocumentCodeFragment
) frag
).Code
+ "\n";
96 /// Gets the list of code fragments in the document.
98 public MixedCodeDocumentFragmentList CodeFragments
100 get { return _codefragments; }
104 /// Gets the list of all fragments in the document.
106 public MixedCodeDocumentFragmentList Fragments
108 get { return _fragments; }
112 /// Gets the encoding of the stream used to read the document.
114 public Encoding StreamEncoding
116 get { return _streamencoding; }
120 /// Gets the list of text fragments in the document.
122 public MixedCodeDocumentFragmentList TextFragments
124 get { return _textfragments; }
129 #region Public Methods
132 /// Create a code fragment instances.
134 /// <returns>The newly created code fragment instance.</returns>
135 public MixedCodeDocumentCodeFragment
CreateCodeFragment()
137 return (MixedCodeDocumentCodeFragment
) CreateFragment(MixedCodeDocumentFragmentType
.Code
);
141 /// Create a text fragment instances.
143 /// <returns>The newly created text fragment instance.</returns>
144 public MixedCodeDocumentTextFragment
CreateTextFragment()
146 return (MixedCodeDocumentTextFragment
) CreateFragment(MixedCodeDocumentFragmentType
.Text
);
150 /// Loads a mixed code document from a stream.
152 /// <param name="stream">The input stream.</param>
153 public void Load(Stream stream
)
155 Load(new StreamReader(stream
));
159 /// Loads a mixed code document from a stream.
161 /// <param name="stream">The input stream.</param>
162 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
163 public void Load(Stream stream
, bool detectEncodingFromByteOrderMarks
)
165 Load(new StreamReader(stream
, detectEncodingFromByteOrderMarks
));
169 /// Loads a mixed code document from a stream.
171 /// <param name="stream">The input stream.</param>
172 /// <param name="encoding">The character encoding to use.</param>
173 public void Load(Stream stream
, Encoding encoding
)
175 Load(new StreamReader(stream
, encoding
));
179 /// Loads a mixed code document from a stream.
181 /// <param name="stream">The input stream.</param>
182 /// <param name="encoding">The character encoding to use.</param>
183 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
184 public void Load(Stream stream
, Encoding encoding
, bool detectEncodingFromByteOrderMarks
)
186 Load(new StreamReader(stream
, encoding
, detectEncodingFromByteOrderMarks
));
190 /// Loads a mixed code document from a stream.
192 /// <param name="stream">The input stream.</param>
193 /// <param name="encoding">The character encoding to use.</param>
194 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
195 /// <param name="buffersize">The minimum buffer size.</param>
196 public void Load(Stream stream
, Encoding encoding
, bool detectEncodingFromByteOrderMarks
, int buffersize
)
198 Load(new StreamReader(stream
, encoding
, detectEncodingFromByteOrderMarks
, buffersize
));
202 /// Loads a mixed code document from a file.
204 /// <param name="path">The complete file path to be read.</param>
205 public void Load(string path
)
207 Load(new StreamReader(path
));
211 /// Loads a mixed code document from a file.
213 /// <param name="path">The complete file path to be read.</param>
214 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
215 public void Load(string path
, bool detectEncodingFromByteOrderMarks
)
217 Load(new StreamReader(path
, detectEncodingFromByteOrderMarks
));
221 /// Loads a mixed code document from a file.
223 /// <param name="path">The complete file path to be read.</param>
224 /// <param name="encoding">The character encoding to use.</param>
225 public void Load(string path
, Encoding encoding
)
227 Load(new StreamReader(path
, encoding
));
231 /// Loads a mixed code document from a file.
233 /// <param name="path">The complete file path to be read.</param>
234 /// <param name="encoding">The character encoding to use.</param>
235 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
236 public void Load(string path
, Encoding encoding
, bool detectEncodingFromByteOrderMarks
)
238 Load(new StreamReader(path
, encoding
, detectEncodingFromByteOrderMarks
));
242 /// Loads a mixed code document from a file.
244 /// <param name="path">The complete file path to be read.</param>
245 /// <param name="encoding">The character encoding to use.</param>
246 /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
247 /// <param name="buffersize">The minimum buffer size.</param>
248 public void Load(string path
, Encoding encoding
, bool detectEncodingFromByteOrderMarks
, int buffersize
)
250 Load(new StreamReader(path
, encoding
, detectEncodingFromByteOrderMarks
, buffersize
));
254 /// Loads the mixed code document from the specified TextReader.
256 /// <param name="reader">The TextReader used to feed the HTML data into the document.</param>
257 public void Load(TextReader reader
)
259 _codefragments
.Clear();
260 _textfragments
.Clear();
262 // all pseudo constructors get down to this one
263 StreamReader sr
= reader
as StreamReader
;
266 _streamencoding
= sr
.CurrentEncoding
;
269 _text
= reader
.ReadToEnd();
275 /// Loads a mixed document from a text
277 /// <param name="html">The text to load.</param>
278 public void LoadHtml(string html
)
280 Load(new StringReader(html
));
284 /// Saves the mixed document to the specified stream.
286 /// <param name="outStream">The stream to which you want to save.</param>
287 public void Save(Stream outStream
)
289 StreamWriter sw
= new StreamWriter(outStream
, GetOutEncoding());
294 /// Saves the mixed document to the specified stream.
296 /// <param name="outStream">The stream to which you want to save.</param>
297 /// <param name="encoding">The character encoding to use.</param>
298 public void Save(Stream outStream
, Encoding encoding
)
300 StreamWriter sw
= new StreamWriter(outStream
, encoding
);
305 /// Saves the mixed document to the specified file.
307 /// <param name="filename">The location of the file where you want to save the document.</param>
308 public void Save(string filename
)
310 StreamWriter sw
= new StreamWriter(filename
, false, GetOutEncoding());
315 /// Saves the mixed document to the specified file.
317 /// <param name="filename">The location of the file where you want to save the document.</param>
318 /// <param name="encoding">The character encoding to use.</param>
319 public void Save(string filename
, Encoding encoding
)
321 StreamWriter sw
= new StreamWriter(filename
, false, encoding
);
326 /// Saves the mixed document to the specified StreamWriter.
328 /// <param name="writer">The StreamWriter to which you want to save.</param>
329 public void Save(StreamWriter writer
)
331 Save((TextWriter
) writer
);
335 /// Saves the mixed document to the specified TextWriter.
337 /// <param name="writer">The TextWriter to which you want to save.</param>
338 public void Save(TextWriter writer
)
345 #region Internal Methods
347 internal MixedCodeDocumentFragment
CreateFragment(MixedCodeDocumentFragmentType type
)
351 case MixedCodeDocumentFragmentType
.Text
:
352 return new MixedCodeDocumentTextFragment(this);
354 case MixedCodeDocumentFragmentType
.Code
:
355 return new MixedCodeDocumentCodeFragment(this);
358 throw new NotSupportedException();
362 internal Encoding
GetOutEncoding()
364 if (_streamencoding
!= null)
365 return _streamencoding
;
366 return Encoding
.Default
;
371 #region Private Methods
373 private void IncrementPosition()
387 _state
= ParseState
.Text
;
389 _currentfragment
= CreateFragment(MixedCodeDocumentFragmentType
.Text
);
391 while (_index
< _text
.Length
)
398 case ParseState
.Text
:
399 if (_index
+ TokenCodeStart
.Length
< _text
.Length
)
401 if (_text
.Substring(_index
- 1, TokenCodeStart
.Length
) == TokenCodeStart
)
403 _state
= ParseState
.Code
;
404 _currentfragment
.Length
= _index
- 1 - _currentfragment
.Index
;
405 _currentfragment
= CreateFragment(MixedCodeDocumentFragmentType
.Code
);
412 case ParseState
.Code
:
413 if (_index
+ TokenCodeEnd
.Length
< _text
.Length
)
415 if (_text
.Substring(_index
- 1, TokenCodeEnd
.Length
) == TokenCodeEnd
)
417 _state
= ParseState
.Text
;
418 _currentfragment
.Length
= _index
+ TokenCodeEnd
.Length
- _currentfragment
.Index
;
419 _index
+= TokenCodeEnd
.Length
;
420 _lineposition
+= TokenCodeEnd
.Length
;
421 _currentfragment
= CreateFragment(MixedCodeDocumentFragmentType
.Text
);
430 _currentfragment
.Length
= _index
- _currentfragment
.Index
;
433 private void SetPosition()
435 _currentfragment
.Line
= _line
;
436 _currentfragment
._lineposition
= _lineposition
;
437 _currentfragment
.Index
= _index
- 1;
438 _currentfragment
.Length
= 0;
443 #region Nested type: ParseState
445 private enum ParseState