Backed out changeset 4191b252db9b (bug 1886734) for causing build bustages @netwerk...
[gecko.git] / netwerk / streamconv / converters / mozTXTToHTMLConv.h
blobdbae8edb0f083ccdebc0dba1e9f5b15fe5b7617e
1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 /**
7 Description: Currently only functions to enhance plain text with HTML tags.
8 See mozITXTToHTMLConv. Stream conversion is defunct.
9 */
11 #ifndef _mozTXTToHTMLConv_h__
12 #define _mozTXTToHTMLConv_h__
14 #include "mozITXTToHTMLConv.h"
15 #include "nsIThreadRetargetableStreamListener.h"
16 #include "nsString.h"
17 #include "nsCOMPtr.h"
19 class nsIIOService;
21 class mozTXTToHTMLConv : public mozITXTToHTMLConv {
22 virtual ~mozTXTToHTMLConv() = default;
24 //////////////////////////////////////////////////////////
25 public:
26 //////////////////////////////////////////////////////////
28 mozTXTToHTMLConv() = default;
29 NS_DECL_ISUPPORTS
31 NS_DECL_MOZITXTTOHTMLCONV
32 NS_DECL_NSIREQUESTOBSERVER
33 NS_DECL_NSISTREAMLISTENER
34 NS_DECL_NSITHREADRETARGETABLESTREAMLISTENER
35 NS_DECL_NSISTREAMCONVERTER
37 /**
38 see mozITXTToHTMLConv::CiteLevelTXT
40 int32_t CiteLevelTXT(const char16_t* line, uint32_t& logLineStart);
42 //////////////////////////////////////////////////////////
43 protected:
44 //////////////////////////////////////////////////////////
45 nsCOMPtr<nsIIOService>
46 mIOService; // for performance reasons, cache the netwerk service...
47 /**
48 Completes<ul>
49 <li>Case 1: mailto: "mozilla@bucksch.org" -> "mailto:mozilla@bucksch.org"
50 <li>Case 2: http: "www.mozilla.org" -> "http://www.mozilla.org"
51 <li>Case 3: ftp: "ftp.mozilla.org" -> "ftp://www.mozilla.org"
52 </ul>
53 It does no check, if the resulting URL is valid.
54 @param text (in): abbreviated URL
55 @param pos (in): position of "@" (case 1) or first "." (case 2 and 3)
56 @return Completed URL at success and empty string at failure
58 void CompleteAbbreviatedURL(const char16_t* aInString, int32_t aInLength,
59 const uint32_t pos, nsString& aOutString);
61 //////////////////////////////////////////////////////////
62 private:
63 //////////////////////////////////////////////////////////
65 enum LIMTYPE {
66 LT_IGNORE, // limitation not checked
67 LT_DELIMITER, // not alphanumeric and not rep[0]. End of text is also ok.
68 LT_ALPHA, // alpha char
69 LT_DIGIT
72 /**
73 @param text (in): the string to search through.<p>
74 If before = IGNORE,<br>
75 rep is compared starting at 1. char of text (text[0]),<br>
76 else starting at 2. char of text (text[1]).
77 Chars after "after"-delimiter are ignored.
78 @param rep (in): the string to look for
79 @param aRepLen (in): the number of bytes in the string to look for
80 @param before (in): limitation before rep
81 @param after (in): limitation after rep
82 @return true, if rep is found and limitation spec is met or rep is empty
84 bool ItMatchesDelimited(const char16_t* aInString, int32_t aInLength,
85 const char16_t* rep, int32_t aRepLen, LIMTYPE before,
86 LIMTYPE after);
88 /**
89 @param see ItMatchesDelimited
90 @return Number of ItMatchesDelimited in text
92 uint32_t NumberOfMatches(const char16_t* aInString, int32_t aInStringLength,
93 const char16_t* rep, int32_t aRepLen, LIMTYPE before,
94 LIMTYPE after);
96 /**
97 Currently only changes "<", ">" and "&". All others stay as they are.<p>
98 "Char" in function name to avoid side effects with nsString(ch)
99 constructors.
100 @param ch (in)
101 @param aStringToAppendto (out) - the string to append the escaped
102 string to.
103 @param inAttribute (in) - will escape quotes, too (which is
104 only needed for attribute values)
106 void EscapeChar(const char16_t ch, nsAString& aStringToAppendto,
107 bool inAttribute);
110 See EscapeChar. Escapes the string in place.
112 void EscapeStr(nsString& aInString, bool inAttribute);
115 Currently only reverts "<", ">" and "&". All others stay as they are.<p>
116 @param aInString (in) HTML string
117 @param aStartPos (in) start index into the buffer
118 @param aLength (in) length of the buffer
119 @param aOutString (out) unescaped buffer
121 void UnescapeStr(const char16_t* aInString, int32_t aStartPos,
122 int32_t aLength, nsString& aOutString);
125 <em>Note</em>: I use different strategies to pass context between the
126 functions (full text and pos vs. cutted text and col0, glphyTextLen vs.
127 replaceBefore/-After). It makes some sense, but is hard to understand
128 (maintain) :-(.
132 <p><em>Note:</em> replaceBefore + replaceAfter + 1 (for char at pos) chars
133 in text should be replaced by outputHTML.</p>
134 <p><em>Note:</em> This function should be able to process a URL on multiple
135 lines, but currently, ScanForURLs is called for every line, so it can't.</p>
136 @param text (in): includes possibly a URL
137 @param pos (in): position in text, where either ":", "." or "@" are found
138 @param whathasbeendone (in): What the calling ScanTXT did/has to do with the
139 (not-linkified) text, i.e. usually the "whattodo" parameter.
140 (Needed to calculate replaceBefore.) NOT what will be done with
141 the content of the link.
142 @param outputHTML (out): URL with HTML-a tag
143 @param replaceBefore (out): Number of chars of URL before pos
144 @param replaceAfter (out): Number of chars of URL after pos
145 @return URL found
147 bool FindURL(const char16_t* aInString, int32_t aInLength, const uint32_t pos,
148 const uint32_t whathasbeendone, nsString& outputHTML,
149 int32_t& replaceBefore, int32_t& replaceAfter);
151 enum modetype {
152 unknown,
153 RFC1738, /* Check, if RFC1738, APPENDIX compliant,
154 like "<URL:http://www.mozilla.org>". */
155 RFC2396E, /* RFC2396, APPENDIX E allows anglebrackets (like
156 "<http://www.mozilla.org>") (without "URL:") or
157 quotation marks(like ""http://www.mozilla.org"").
158 Also allow email addresses without scheme,
159 e.g. "<mozilla@bucksch.org>" */
160 freetext, /* assume heading scheme
161 with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:"
162 (see RFC2396, Section 3.1).
163 Certain characters (see code) or any whitespace
164 (including linebreaks) end the URL.
165 Other certain (punctation) characters (see code)
166 at the end are stripped off. */
167 abbreviated /* Similar to freetext, but without scheme, e.g.
168 "www.mozilla.org", "ftp.mozilla.org" and
169 "mozilla@bucksch.org". */
170 /* RFC1738 and RFC2396E type URLs may use multiple lines,
171 whitespace is stripped. Special characters like ")" stay intact.*/
175 * @param text (in), pos (in): see FindURL
176 * @param check (in): Start must be conform with this mode
177 * @param start (out): Position in text, where URL (including brackets or
178 * similar) starts
179 * @return |check|-conform start has been found
181 bool FindURLStart(const char16_t* aInString, int32_t aInLength,
182 const uint32_t pos, const modetype check, uint32_t& start);
185 * @param text (in), pos (in): see FindURL
186 * @param check (in): End must be conform with this mode
187 * @param start (in): see FindURLStart
188 * @param end (out): Similar to |start| param of FindURLStart
189 * @return |check|-conform end has been found
191 bool FindURLEnd(const char16_t* aInString, int32_t aInStringLength,
192 const uint32_t pos, const modetype check,
193 const uint32_t start, uint32_t& end);
196 * @param text (in), pos (in), whathasbeendone (in): see FindURL
197 * @param check (in): Current mode
198 * @param start (in), end (in): see FindURLEnd
199 * @param txtURL (out): Guessed (raw) URL.
200 * Without whitespace, but not completed.
201 * @param desc (out): Link as shown to the user, but already escaped.
202 * Should be placed between the <a> and </a> tags.
203 * @param replaceBefore(out), replaceAfter (out): see FindURL
205 void CalculateURLBoundaries(const char16_t* aInString,
206 int32_t aInStringLength, const uint32_t pos,
207 const uint32_t whathasbeendone,
208 const modetype check, const uint32_t start,
209 const uint32_t end, nsString& txtURL,
210 nsString& desc, int32_t& replaceBefore,
211 int32_t& replaceAfter);
214 * @param txtURL (in), desc (in): see CalculateURLBoundaries
215 * @param outputHTML (out): see FindURL
216 * @return A valid URL could be found (and creation of HTML successful)
218 bool CheckURLAndCreateHTML(const nsString& txtURL, const nsString& desc,
219 const modetype mode, nsString& outputHTML);
222 @param text (in): line of text possibly with tagTXT.<p>
223 if col0 is true,
224 starting with tagTXT<br>
225 else
226 starting one char before tagTXT
227 @param col0 (in): tagTXT is on the beginning of the line (or paragraph).
228 open must be 0 then.
229 @param tagTXT (in): Tag in plaintext to search for, e.g. "*"
230 @param aTagTxtLen (in): length of tagTXT.
231 @param tagHTML (in): HTML-Tag to replace tagTXT with,
232 without "<" and ">", e.g. "strong"
233 @param attributeHTML (in): HTML-attribute to add to opening tagHTML,
234 e.g. "class=txt_star"
235 @param aOutString: string to APPEND the converted html into
236 @param open (in/out): Number of currently open tags of type tagHTML
237 @return Conversion succeeded
239 bool StructPhraseHit(const char16_t* aInString, int32_t aInStringLength,
240 bool col0, const char16_t* tagTXT, int32_t aTagTxtLen,
241 const char* tagHTML, const char* attributeHTML,
242 nsAString& aOutString, uint32_t& openTags);
245 @param text (in), col0 (in): see GlyphHit
246 @param tagTXT (in): Smily, see also StructPhraseHit
247 @param imageName (in): the basename of the file that contains the image for
248 this smilie
249 @param outputHTML (out): new string containing the html for the smily
250 @param glyphTextLen (out): see GlyphHit
252 bool SmilyHit(const char16_t* aInString, int32_t aLength, bool col0,
253 const char* tagTXT, const nsString& imageName,
254 nsString& outputHTML, int32_t& glyphTextLen);
257 Checks, if we can replace some chars at the start of line with prettier HTML
258 code.<p>
259 If success is reported, replace the first glyphTextLen chars with outputHTML
261 @param text (in): line of text possibly with Glyph.<p>
262 If col0 is true,
263 starting with Glyph <br><!-- (br not part of text) -->
264 else
265 starting one char before Glyph
266 @param col0 (in): text starts at the beginning of the line (or paragraph)
267 @param aOutString (out): APPENDS html for the glyph to this string
268 @param glyphTextLen (out): Length of original text to replace
269 @return see StructPhraseHit
271 bool GlyphHit(const char16_t* aInString, int32_t aInLength, bool col0,
272 nsAString& aOutputString, int32_t& glyphTextLen);
275 Check if a given url should be linkified.
276 @param aURL (in): url to be checked on.
278 bool ShouldLinkify(const nsCString& aURL);
281 // It's said, that Win32 and Mac don't like static const members
282 const int32_t mozTXTToHTMLConv_lastMode = 4;
283 // Needed (only) by mozTXTToHTMLConv::FindURL
284 const int32_t mozTXTToHTMLConv_numberOfModes = 4; // dito; unknown not counted
286 #endif