Bug 1814798 - pt 2. Add a PHCManager component to control PHC r=glandium,emilio
[gecko.git] / netwerk / streamconv / converters / mozTXTToHTMLConv.h
bloba9b888845026ec3e8329d2afd4324f46a4c7828a
1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 /**
7 Description: Currently only functions to enhance plain text with HTML tags.
8 See mozITXTToHTMLConv. Stream conversion is defunct.
9 */
11 #ifndef _mozTXTToHTMLConv_h__
12 #define _mozTXTToHTMLConv_h__
14 #include "mozITXTToHTMLConv.h"
15 #include "nsString.h"
16 #include "nsCOMPtr.h"
18 class nsIIOService;
20 class mozTXTToHTMLConv : public mozITXTToHTMLConv {
21 virtual ~mozTXTToHTMLConv() = default;
23 //////////////////////////////////////////////////////////
24 public:
25 //////////////////////////////////////////////////////////
27 mozTXTToHTMLConv() = default;
28 NS_DECL_ISUPPORTS
30 NS_DECL_MOZITXTTOHTMLCONV
31 NS_DECL_NSIREQUESTOBSERVER
32 NS_DECL_NSISTREAMLISTENER
33 NS_DECL_NSISTREAMCONVERTER
35 /**
36 see mozITXTToHTMLConv::CiteLevelTXT
38 int32_t CiteLevelTXT(const char16_t* line, uint32_t& logLineStart);
40 //////////////////////////////////////////////////////////
41 protected:
42 //////////////////////////////////////////////////////////
43 nsCOMPtr<nsIIOService>
44 mIOService; // for performance reasons, cache the netwerk service...
45 /**
46 Completes<ul>
47 <li>Case 1: mailto: "mozilla@bucksch.org" -> "mailto:mozilla@bucksch.org"
48 <li>Case 2: http: "www.mozilla.org" -> "http://www.mozilla.org"
49 <li>Case 3: ftp: "ftp.mozilla.org" -> "ftp://www.mozilla.org"
50 </ul>
51 It does no check, if the resulting URL is valid.
52 @param text (in): abbreviated URL
53 @param pos (in): position of "@" (case 1) or first "." (case 2 and 3)
54 @return Completed URL at success and empty string at failure
56 void CompleteAbbreviatedURL(const char16_t* aInString, int32_t aInLength,
57 const uint32_t pos, nsString& aOutString);
59 //////////////////////////////////////////////////////////
60 private:
61 //////////////////////////////////////////////////////////
63 enum LIMTYPE {
64 LT_IGNORE, // limitation not checked
65 LT_DELIMITER, // not alphanumeric and not rep[0]. End of text is also ok.
66 LT_ALPHA, // alpha char
67 LT_DIGIT
70 /**
71 @param text (in): the string to search through.<p>
72 If before = IGNORE,<br>
73 rep is compared starting at 1. char of text (text[0]),<br>
74 else starting at 2. char of text (text[1]).
75 Chars after "after"-delimiter are ignored.
76 @param rep (in): the string to look for
77 @param aRepLen (in): the number of bytes in the string to look for
78 @param before (in): limitation before rep
79 @param after (in): limitation after rep
80 @return true, if rep is found and limitation spec is met or rep is empty
82 bool ItMatchesDelimited(const char16_t* aInString, int32_t aInLength,
83 const char16_t* rep, int32_t aRepLen, LIMTYPE before,
84 LIMTYPE after);
86 /**
87 @param see ItMatchesDelimited
88 @return Number of ItMatchesDelimited in text
90 uint32_t NumberOfMatches(const char16_t* aInString, int32_t aInStringLength,
91 const char16_t* rep, int32_t aRepLen, LIMTYPE before,
92 LIMTYPE after);
94 /**
95 Currently only changes "<", ">" and "&". All others stay as they are.<p>
96 "Char" in function name to avoid side effects with nsString(ch)
97 constructors.
98 @param ch (in)
99 @param aStringToAppendto (out) - the string to append the escaped
100 string to.
101 @param inAttribute (in) - will escape quotes, too (which is
102 only needed for attribute values)
104 void EscapeChar(const char16_t ch, nsAString& aStringToAppendto,
105 bool inAttribute);
108 See EscapeChar. Escapes the string in place.
110 void EscapeStr(nsString& aInString, bool inAttribute);
113 Currently only reverts "<", ">" and "&". All others stay as they are.<p>
114 @param aInString (in) HTML string
115 @param aStartPos (in) start index into the buffer
116 @param aLength (in) length of the buffer
117 @param aOutString (out) unescaped buffer
119 void UnescapeStr(const char16_t* aInString, int32_t aStartPos,
120 int32_t aLength, nsString& aOutString);
123 <em>Note</em>: I use different strategies to pass context between the
124 functions (full text and pos vs. cutted text and col0, glphyTextLen vs.
125 replaceBefore/-After). It makes some sense, but is hard to understand
126 (maintain) :-(.
130 <p><em>Note:</em> replaceBefore + replaceAfter + 1 (for char at pos) chars
131 in text should be replaced by outputHTML.</p>
132 <p><em>Note:</em> This function should be able to process a URL on multiple
133 lines, but currently, ScanForURLs is called for every line, so it can't.</p>
134 @param text (in): includes possibly a URL
135 @param pos (in): position in text, where either ":", "." or "@" are found
136 @param whathasbeendone (in): What the calling ScanTXT did/has to do with the
137 (not-linkified) text, i.e. usually the "whattodo" parameter.
138 (Needed to calculate replaceBefore.) NOT what will be done with
139 the content of the link.
140 @param outputHTML (out): URL with HTML-a tag
141 @param replaceBefore (out): Number of chars of URL before pos
142 @param replaceAfter (out): Number of chars of URL after pos
143 @return URL found
145 bool FindURL(const char16_t* aInString, int32_t aInLength, const uint32_t pos,
146 const uint32_t whathasbeendone, nsString& outputHTML,
147 int32_t& replaceBefore, int32_t& replaceAfter);
149 enum modetype {
150 unknown,
151 RFC1738, /* Check, if RFC1738, APPENDIX compliant,
152 like "<URL:http://www.mozilla.org>". */
153 RFC2396E, /* RFC2396, APPENDIX E allows anglebrackets (like
154 "<http://www.mozilla.org>") (without "URL:") or
155 quotation marks(like ""http://www.mozilla.org"").
156 Also allow email addresses without scheme,
157 e.g. "<mozilla@bucksch.org>" */
158 freetext, /* assume heading scheme
159 with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:"
160 (see RFC2396, Section 3.1).
161 Certain characters (see code) or any whitespace
162 (including linebreaks) end the URL.
163 Other certain (punctation) characters (see code)
164 at the end are stripped off. */
165 abbreviated /* Similar to freetext, but without scheme, e.g.
166 "www.mozilla.org", "ftp.mozilla.org" and
167 "mozilla@bucksch.org". */
168 /* RFC1738 and RFC2396E type URLs may use multiple lines,
169 whitespace is stripped. Special characters like ")" stay intact.*/
173 * @param text (in), pos (in): see FindURL
174 * @param check (in): Start must be conform with this mode
175 * @param start (out): Position in text, where URL (including brackets or
176 * similar) starts
177 * @return |check|-conform start has been found
179 bool FindURLStart(const char16_t* aInString, int32_t aInLength,
180 const uint32_t pos, const modetype check, uint32_t& start);
183 * @param text (in), pos (in): see FindURL
184 * @param check (in): End must be conform with this mode
185 * @param start (in): see FindURLStart
186 * @param end (out): Similar to |start| param of FindURLStart
187 * @return |check|-conform end has been found
189 bool FindURLEnd(const char16_t* aInString, int32_t aInStringLength,
190 const uint32_t pos, const modetype check,
191 const uint32_t start, uint32_t& end);
194 * @param text (in), pos (in), whathasbeendone (in): see FindURL
195 * @param check (in): Current mode
196 * @param start (in), end (in): see FindURLEnd
197 * @param txtURL (out): Guessed (raw) URL.
198 * Without whitespace, but not completed.
199 * @param desc (out): Link as shown to the user, but already escaped.
200 * Should be placed between the <a> and </a> tags.
201 * @param replaceBefore(out), replaceAfter (out): see FindURL
203 void CalculateURLBoundaries(const char16_t* aInString,
204 int32_t aInStringLength, const uint32_t pos,
205 const uint32_t whathasbeendone,
206 const modetype check, const uint32_t start,
207 const uint32_t end, nsString& txtURL,
208 nsString& desc, int32_t& replaceBefore,
209 int32_t& replaceAfter);
212 * @param txtURL (in), desc (in): see CalculateURLBoundaries
213 * @param outputHTML (out): see FindURL
214 * @return A valid URL could be found (and creation of HTML successful)
216 bool CheckURLAndCreateHTML(const nsString& txtURL, const nsString& desc,
217 const modetype mode, nsString& outputHTML);
220 @param text (in): line of text possibly with tagTXT.<p>
221 if col0 is true,
222 starting with tagTXT<br>
223 else
224 starting one char before tagTXT
225 @param col0 (in): tagTXT is on the beginning of the line (or paragraph).
226 open must be 0 then.
227 @param tagTXT (in): Tag in plaintext to search for, e.g. "*"
228 @param aTagTxtLen (in): length of tagTXT.
229 @param tagHTML (in): HTML-Tag to replace tagTXT with,
230 without "<" and ">", e.g. "strong"
231 @param attributeHTML (in): HTML-attribute to add to opening tagHTML,
232 e.g. "class=txt_star"
233 @param aOutString: string to APPEND the converted html into
234 @param open (in/out): Number of currently open tags of type tagHTML
235 @return Conversion succeeded
237 bool StructPhraseHit(const char16_t* aInString, int32_t aInStringLength,
238 bool col0, const char16_t* tagTXT, int32_t aTagTxtLen,
239 const char* tagHTML, const char* attributeHTML,
240 nsAString& aOutString, uint32_t& openTags);
243 @param text (in), col0 (in): see GlyphHit
244 @param tagTXT (in): Smily, see also StructPhraseHit
245 @param imageName (in): the basename of the file that contains the image for
246 this smilie
247 @param outputHTML (out): new string containing the html for the smily
248 @param glyphTextLen (out): see GlyphHit
250 bool SmilyHit(const char16_t* aInString, int32_t aLength, bool col0,
251 const char* tagTXT, const nsString& imageName,
252 nsString& outputHTML, int32_t& glyphTextLen);
255 Checks, if we can replace some chars at the start of line with prettier HTML
256 code.<p>
257 If success is reported, replace the first glyphTextLen chars with outputHTML
259 @param text (in): line of text possibly with Glyph.<p>
260 If col0 is true,
261 starting with Glyph <br><!-- (br not part of text) -->
262 else
263 starting one char before Glyph
264 @param col0 (in): text starts at the beginning of the line (or paragraph)
265 @param aOutString (out): APPENDS html for the glyph to this string
266 @param glyphTextLen (out): Length of original text to replace
267 @return see StructPhraseHit
269 bool GlyphHit(const char16_t* aInString, int32_t aInLength, bool col0,
270 nsAString& aOutputString, int32_t& glyphTextLen);
273 Check if a given url should be linkified.
274 @param aURL (in): url to be checked on.
276 bool ShouldLinkify(const nsCString& aURL);
279 // It's said, that Win32 and Mac don't like static const members
280 const int32_t mozTXTToHTMLConv_lastMode = 4;
281 // Needed (only) by mozTXTToHTMLConv::FindURL
282 const int32_t mozTXTToHTMLConv_numberOfModes = 4; // dito; unknown not counted
284 #endif