Bug 1869043 assert that graph set access is main thread only r=padenot
[gecko.git] / netwerk / mime / nsMIMEHeaderParamImpl.cpp
blob22cf71728b66ddaab69007f1ab8e10780a9ef84a
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set sw=2 ts=8 et tw=80 : */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include <string.h>
8 #include "prprf.h"
9 #include "prmem.h"
10 #include "plbase64.h"
11 #include "nsCRT.h"
12 #include "nsTArray.h"
13 #include "nsEscape.h"
14 #include "nsMIMEHeaderParamImpl.h"
15 #include "nsNativeCharsetUtils.h"
16 #include "mozilla/Encoding.h"
17 #include "mozilla/TextUtils.h"
18 #include "mozilla/Utf8.h"
20 using mozilla::Encoding;
21 using mozilla::IsAscii;
22 using mozilla::IsUtf8;
24 // static functions declared below are moved from mailnews/mime/src/comi18n.cpp
26 static char* DecodeQ(const char*, uint32_t);
27 static bool Is7bitNonAsciiString(const char*, uint32_t);
28 static void CopyRawHeader(const char*, uint32_t, const nsACString&,
29 nsACString&);
30 static nsresult DecodeRFC2047Str(const char*, const nsACString&, bool,
31 nsACString&);
32 static nsresult internalDecodeParameter(const nsACString&, const nsACString&,
33 const nsACString&, bool, bool,
34 nsACString&);
36 static nsresult ToUTF8(const nsACString& aString, const nsACString& aCharset,
37 bool aAllowSubstitution, nsACString& aResult) {
38 if (aCharset.IsEmpty()) {
39 return NS_ERROR_INVALID_ARG;
42 const auto* encoding = Encoding::ForLabelNoReplacement(aCharset);
43 if (!encoding) {
44 return NS_ERROR_UCONV_NOCONV;
46 if (aAllowSubstitution) {
47 nsresult rv = encoding->DecodeWithoutBOMHandling(aString, aResult);
48 if (NS_SUCCEEDED(rv)) {
49 return NS_OK;
51 return rv;
53 return encoding->DecodeWithoutBOMHandlingAndWithoutReplacement(aString,
54 aResult);
57 static nsresult ConvertStringToUTF8(const nsACString& aString,
58 const nsACString& aCharset, bool aSkipCheck,
59 bool aAllowSubstitution,
60 nsACString& aUTF8String) {
61 // return if ASCII only or valid UTF-8 providing that the ASCII/UTF-8
62 // check is requested. It may not be asked for if a caller suspects
63 // that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or
64 // it's in a charset other than UTF-8 that can be mistaken for UTF-8.
65 if (!aSkipCheck && (IsAscii(aString) || IsUtf8(aString))) {
66 aUTF8String = aString;
67 return NS_OK;
70 aUTF8String.Truncate();
72 nsresult rv = ToUTF8(aString, aCharset, aAllowSubstitution, aUTF8String);
74 // additional protection for cases where check is skipped and the input
75 // is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch
76 // was wrong.) We don't check ASCIIness assuming there's no charset
77 // incompatible with ASCII (we don't support EBCDIC).
78 if (aSkipCheck && NS_FAILED(rv) && IsUtf8(aString)) {
79 aUTF8String = aString;
80 return NS_OK;
83 return rv;
86 // XXX The chance of UTF-7 being used in the message header is really
87 // low, but in theory it's possible.
88 #define IS_7BIT_NON_ASCII_CHARSET(cset) \
89 (!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \
90 !nsCRT::strncasecmp((cset), "HZ-GB", 5) || \
91 !nsCRT::strncasecmp((cset), "UTF-7", 5))
93 NS_IMPL_ISUPPORTS(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam)
95 NS_IMETHODIMP
96 nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal,
97 const char* aParamName,
98 const nsACString& aFallbackCharset,
99 bool aTryLocaleCharset, char** aLang,
100 nsAString& aResult) {
101 return DoGetParameter(aHeaderVal, aParamName, MIME_FIELD_ENCODING,
102 aFallbackCharset, aTryLocaleCharset, aLang, aResult);
105 NS_IMETHODIMP
106 nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString& aHeaderVal,
107 const char* aParamName,
108 const nsACString& aFallbackCharset,
109 bool aTryLocaleCharset, char** aLang,
110 nsAString& aResult) {
111 return DoGetParameter(aHeaderVal, aParamName, HTTP_FIELD_ENCODING,
112 aFallbackCharset, aTryLocaleCharset, aLang, aResult);
115 /* static */
116 nsresult nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString& aHeaderVal,
117 const char* aParamName,
118 nsAString& aResult) {
119 return DoGetParameter(aHeaderVal, aParamName, HTTP_FIELD_ENCODING, ""_ns,
120 false, nullptr, aResult);
123 /* static */
124 // detects any non-null characters pass null
125 bool nsMIMEHeaderParamImpl::ContainsTrailingCharPastNull(
126 const nsACString& aVal) {
127 nsACString::const_iterator first;
128 aVal.BeginReading(first);
129 nsACString::const_iterator end;
130 aVal.EndReading(end);
132 if (FindCharInReadable(L'\0', first, end)) {
133 while (first != end) {
134 if (*first != '\0') {
135 // contains trailing characters past the null character
136 return true;
138 ++first;
141 return false;
144 // XXX : aTryLocaleCharset is not yet effective.
145 /* static */
146 nsresult nsMIMEHeaderParamImpl::DoGetParameter(
147 const nsACString& aHeaderVal, const char* aParamName,
148 ParamDecoding aDecoding, const nsACString& aFallbackCharset,
149 bool aTryLocaleCharset, char** aLang, nsAString& aResult) {
150 aResult.Truncate();
151 nsresult rv;
153 // get parameter (decode RFC 2231/5987 when applicable, as specified by
154 // aDecoding (5987 being a subset of 2231) and return charset.)
155 nsCString med;
156 nsCString charset;
157 rv = DoParameterInternal(aHeaderVal, aParamName, aDecoding,
158 getter_Copies(charset), aLang, getter_Copies(med));
159 if (NS_FAILED(rv)) return rv;
161 // convert to UTF-8 after charset conversion and RFC 2047 decoding
162 // if necessary.
164 nsAutoCString str1;
165 rv = internalDecodeParameter(med, charset, ""_ns, false,
166 // was aDecoding == MIME_FIELD_ENCODING
167 // see bug 875615
168 true, str1);
169 NS_ENSURE_SUCCESS(rv, rv);
171 if (!aFallbackCharset.IsEmpty()) {
172 const Encoding* encoding = Encoding::ForLabel(aFallbackCharset);
173 nsAutoCString str2;
174 if (NS_SUCCEEDED(ConvertStringToUTF8(str1, aFallbackCharset, false,
175 encoding != UTF_8_ENCODING, str2))) {
176 CopyUTF8toUTF16(str2, aResult);
177 return NS_OK;
181 if (IsUtf8(str1)) {
182 CopyUTF8toUTF16(str1, aResult);
183 return NS_OK;
186 if (aTryLocaleCharset && !NS_IsNativeUTF8()) {
187 return NS_CopyNativeToUnicode(str1, aResult);
190 CopyASCIItoUTF16(str1, aResult);
191 return NS_OK;
194 // remove backslash-encoded sequences from quoted-strings
195 // modifies string in place, potentially shortening it
196 void RemoveQuotedStringEscapes(char* src) {
197 char* dst = src;
199 for (char* c = src; *c; ++c) {
200 if (c[0] == '\\' && c[1]) {
201 // skip backslash if not at end
202 ++c;
204 *dst++ = *c;
206 *dst = 0;
209 // true is character is a hex digit
210 bool IsHexDigit(char aChar) {
211 char c = aChar;
213 return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') ||
214 (c >= '0' && c <= '9');
217 // validate that a C String containing %-escapes is syntactically valid
218 bool IsValidPercentEscaped(const char* aValue, int32_t len) {
219 for (int32_t i = 0; i < len; i++) {
220 if (aValue[i] == '%') {
221 if (!IsHexDigit(aValue[i + 1]) || !IsHexDigit(aValue[i + 2])) {
222 return false;
226 return true;
229 // Support for continuations (RFC 2231, Section 3)
231 // only a sane number supported
232 #define MAX_CONTINUATIONS 999
234 // part of a continuation
236 class Continuation {
237 public:
238 Continuation(const char* aValue, uint32_t aLength, bool aNeedsPercentDecoding,
239 bool aWasQuotedString) {
240 value = aValue;
241 length = aLength;
242 needsPercentDecoding = aNeedsPercentDecoding;
243 wasQuotedString = aWasQuotedString;
245 Continuation() {
246 // empty constructor needed for nsTArray
247 value = nullptr;
248 length = 0;
249 needsPercentDecoding = false;
250 wasQuotedString = false;
252 ~Continuation() = default;
254 const char* value;
255 uint32_t length;
256 bool needsPercentDecoding;
257 bool wasQuotedString;
260 // combine segments into a single string, returning the allocated string
261 // (or nullptr) while emptying the list
262 char* combineContinuations(nsTArray<Continuation>& aArray) {
263 // Sanity check
264 if (aArray.Length() == 0) return nullptr;
266 // Get an upper bound for the length
267 uint32_t length = 0;
268 for (uint32_t i = 0; i < aArray.Length(); i++) {
269 length += aArray[i].length;
272 // Allocate
273 char* result = (char*)moz_xmalloc(length + 1);
275 // Concatenate
276 *result = '\0';
278 for (uint32_t i = 0; i < aArray.Length(); i++) {
279 Continuation cont = aArray[i];
280 if (!cont.value) break;
282 char* c = result + strlen(result);
283 strncat(result, cont.value, cont.length);
284 if (cont.needsPercentDecoding) {
285 nsUnescape(c);
287 if (cont.wasQuotedString) {
288 RemoveQuotedStringEscapes(c);
292 // return null if empty value
293 if (*result == '\0') {
294 free(result);
295 result = nullptr;
298 return result;
301 // add a continuation, return false on error if segment already has been seen
302 bool addContinuation(nsTArray<Continuation>& aArray, uint32_t aIndex,
303 const char* aValue, uint32_t aLength,
304 bool aNeedsPercentDecoding, bool aWasQuotedString) {
305 if (aIndex < aArray.Length() && aArray[aIndex].value) {
306 NS_WARNING("duplicate RC2231 continuation segment #\n");
307 return false;
310 if (aIndex > MAX_CONTINUATIONS) {
311 NS_WARNING("RC2231 continuation segment # exceeds limit\n");
312 return false;
315 if (aNeedsPercentDecoding && aWasQuotedString) {
316 NS_WARNING(
317 "RC2231 continuation segment can't use percent encoding and quoted "
318 "string form at the same time\n");
319 return false;
322 Continuation cont(aValue, aLength, aNeedsPercentDecoding, aWasQuotedString);
324 if (aArray.Length() <= aIndex) {
325 aArray.SetLength(aIndex + 1);
327 aArray[aIndex] = cont;
329 return true;
332 // parse a segment number; return -1 on error
333 int32_t parseSegmentNumber(const char* aValue, int32_t aLen) {
334 if (aLen < 1) {
335 NS_WARNING("segment number missing\n");
336 return -1;
339 if (aLen > 1 && aValue[0] == '0') {
340 NS_WARNING("leading '0' not allowed in segment number\n");
341 return -1;
344 int32_t segmentNumber = 0;
346 for (int32_t i = 0; i < aLen; i++) {
347 if (!(aValue[i] >= '0' && aValue[i] <= '9')) {
348 NS_WARNING("invalid characters in segment number\n");
349 return -1;
352 segmentNumber *= 10;
353 segmentNumber += aValue[i] - '0';
354 if (segmentNumber > MAX_CONTINUATIONS) {
355 NS_WARNING("Segment number exceeds sane size\n");
356 return -1;
360 return segmentNumber;
363 // validate a given octet sequence for compliance with the specified
364 // encoding
365 bool IsValidOctetSequenceForCharset(const nsACString& aCharset,
366 const char* aOctets) {
367 nsAutoCString tmpRaw;
368 tmpRaw.Assign(aOctets);
369 nsAutoCString tmpDecoded;
371 nsresult rv = ConvertStringToUTF8(tmpRaw, aCharset, false, false, tmpDecoded);
373 if (rv != NS_OK) {
374 // we can't decode; charset may be unsupported, or the octet sequence
375 // is broken (illegal or incomplete octet sequence contained)
376 NS_WARNING(
377 "RFC2231/5987 parameter value does not decode according to specified "
378 "charset\n");
379 return false;
382 return true;
385 // moved almost verbatim from mimehdrs.cpp
386 // char *
387 // MimeHeaders_get_parameter (const char *header_value, const char *parm_name,
388 // char **charset, char **language)
390 // The format of these header lines is
391 // <token> [ ';' <token> '=' <token-or-quoted-string> ]*
392 NS_IMETHODIMP
393 nsMIMEHeaderParamImpl::GetParameterInternal(const nsACString& aHeaderValue,
394 const char* aParamName,
395 char** aCharset, char** aLang,
396 char** aResult) {
397 return DoParameterInternal(aHeaderValue, aParamName, MIME_FIELD_ENCODING,
398 aCharset, aLang, aResult);
401 /* static */
402 nsresult nsMIMEHeaderParamImpl::DoParameterInternal(
403 const nsACString& aHeaderValue, const char* aParamName,
404 ParamDecoding aDecoding, char** aCharset, char** aLang, char** aResult) {
405 if (aHeaderValue.IsEmpty() || !aResult) {
406 return NS_ERROR_INVALID_ARG;
409 if (ContainsTrailingCharPastNull(aHeaderValue)) {
410 // See Bug 1784348
411 return NS_ERROR_INVALID_ARG;
414 const nsCString& flat = PromiseFlatCString(aHeaderValue);
415 const char* str = flat.get();
417 if (!*str) {
418 return NS_ERROR_INVALID_ARG;
421 *aResult = nullptr;
423 if (aCharset) *aCharset = nullptr;
424 if (aLang) *aLang = nullptr;
426 nsAutoCString charset;
428 // change to (aDecoding != HTTP_FIELD_ENCODING) when we want to disable
429 // them for HTTP header fields later on, see bug 776324
430 bool acceptContinuations = true;
432 // skip leading white space.
433 for (; *str && nsCRT::IsAsciiSpace(*str); ++str) {
436 const char* start = str;
438 // aParamName is empty. return the first (possibly) _unnamed_ 'parameter'
439 // For instance, return 'inline' in the following case:
440 // Content-Disposition: inline; filename=.....
441 if (!aParamName || !*aParamName) {
442 for (; *str && *str != ';' && !nsCRT::IsAsciiSpace(*str); ++str) {
445 if (str == start) return NS_ERROR_FIRST_HEADER_FIELD_COMPONENT_EMPTY;
447 *aResult = (char*)moz_xmemdup(start, (str - start) + 1);
448 (*aResult)[str - start] = '\0'; // null-terminate
449 return NS_OK;
452 /* Skip forward to first ';' */
453 for (; *str && *str != ';' && *str != ','; ++str) {
456 if (*str) str++;
457 /* Skip over following whitespace */
458 for (; *str && nsCRT::IsAsciiSpace(*str); ++str) {
462 // Some broken http servers just specify parameters
463 // like 'filename' without specifying disposition
464 // method. Rewind to the first non-white-space
465 // character.
467 if (!*str) str = start;
469 // RFC2231 - The legitimate parm format can be:
470 // A. title=ThisIsTitle
471 // B. title*=us-ascii'en-us'This%20is%20wierd.
472 // C. title*0*=us-ascii'en'This%20is%20wierd.%20We
473 // title*1*=have%20to%20support%20this.
474 // title*2="Else..."
475 // D. title*0="Hey, what you think you are doing?"
476 // title*1="There is no charset and lang info."
477 // RFC5987: only A and B
479 // collect results for the different algorithms (plain filename,
480 // RFC5987/2231-encoded filename, + continuations) separately and decide
481 // which to use at the end
482 char* caseAResult = nullptr;
483 char* caseBResult = nullptr;
484 char* caseCDResult = nullptr;
486 // collect continuation segments
487 nsTArray<Continuation> segments;
489 // our copies of the charset parameter, kept separately as they might
490 // differ for the two formats
491 nsDependentCSubstring charsetB, charsetCD;
493 nsDependentCSubstring lang;
495 int32_t paramLen = strlen(aParamName);
497 while (*str) {
498 // find name/value
500 const char* nameStart = str;
501 const char* nameEnd = nullptr;
502 const char* valueStart = nullptr;
503 const char* valueEnd = nullptr;
504 bool isQuotedString = false;
506 NS_ASSERTION(!nsCRT::IsAsciiSpace(*str), "should be after whitespace.");
508 // Skip forward to the end of this token.
509 for (; *str && !nsCRT::IsAsciiSpace(*str) && *str != '=' && *str != ';';
510 str++) {
513 nameEnd = str;
515 int32_t nameLen = nameEnd - nameStart;
517 // Skip over whitespace, '=', and whitespace
518 while (nsCRT::IsAsciiSpace(*str)) ++str;
519 if (!*str) {
520 break;
522 if (*str != '=') {
523 // don't accept parameters without "="
524 goto increment_str;
526 // Skip over '=' only if it was actually there
527 str++;
528 while (nsCRT::IsAsciiSpace(*str)) ++str;
530 if (*str != '"') {
531 // The value is a token, not a quoted string.
532 valueStart = str;
533 for (valueEnd = str; *valueEnd && *valueEnd != ';'; valueEnd++) {
536 // ignore trailing whitespace:
537 while (valueEnd > valueStart && nsCRT::IsAsciiSpace(*(valueEnd - 1))) {
538 valueEnd--;
540 str = valueEnd;
541 } else {
542 isQuotedString = true;
544 ++str;
545 valueStart = str;
546 for (valueEnd = str; *valueEnd; ++valueEnd) {
547 if (*valueEnd == '\\' && *(valueEnd + 1)) {
548 ++valueEnd;
549 } else if (*valueEnd == '"') {
550 break;
553 str = valueEnd;
554 // *valueEnd != null means that *valueEnd is quote character.
555 if (*valueEnd) str++;
558 // See if this is the simplest case (case A above),
559 // a 'single' line value with no charset and lang.
560 // If so, copy it and return.
561 if (nameLen == paramLen &&
562 !nsCRT::strncasecmp(nameStart, aParamName, paramLen)) {
563 if (caseAResult) {
564 // we already have one caseA result, ignore subsequent ones
565 goto increment_str;
568 // if the parameter spans across multiple lines we have to strip out the
569 // line continuation -- jht 4/29/98
570 nsAutoCString tempStr(valueStart, valueEnd - valueStart);
571 tempStr.StripCRLF();
572 char* res = ToNewCString(tempStr, mozilla::fallible);
573 NS_ENSURE_TRUE(res, NS_ERROR_OUT_OF_MEMORY);
575 if (isQuotedString) RemoveQuotedStringEscapes(res);
577 caseAResult = res;
578 // keep going, we may find a RFC 2231/5987 encoded alternative
580 // case B, C, and D
581 else if (nameLen > paramLen &&
582 !nsCRT::strncasecmp(nameStart, aParamName, paramLen) &&
583 *(nameStart + paramLen) == '*') {
584 // 1st char past '*'
585 const char* cp = nameStart + paramLen + 1;
587 // if param name ends in "*" we need do to RFC5987 "ext-value" decoding
588 bool needExtDecoding = *(nameEnd - 1) == '*';
590 bool caseB = nameLen == paramLen + 1;
591 bool caseCStart = (*cp == '0') && needExtDecoding;
593 // parse the segment number
594 int32_t segmentNumber = -1;
595 if (!caseB) {
596 int32_t segLen = (nameEnd - cp) - (needExtDecoding ? 1 : 0);
597 segmentNumber = parseSegmentNumber(cp, segLen);
599 if (segmentNumber == -1) {
600 acceptContinuations = false;
601 goto increment_str;
605 // CaseB and start of CaseC: requires charset and optional language
606 // in quotes (quotes required even if lang is blank)
607 if (caseB || (caseCStart && acceptContinuations)) {
608 // look for single quotation mark(')
609 const char* sQuote1 = strchr(valueStart, 0x27);
610 const char* sQuote2 = sQuote1 ? strchr(sQuote1 + 1, 0x27) : nullptr;
612 // Two single quotation marks must be present even in
613 // absence of charset and lang.
614 if (!sQuote1 || !sQuote2) {
615 NS_WARNING(
616 "Mandatory two single quotes are missing in header parameter\n");
619 const char* charsetStart = nullptr;
620 int32_t charsetLength = 0;
621 const char* langStart = nullptr;
622 int32_t langLength = 0;
623 const char* rawValStart = nullptr;
624 int32_t rawValLength = 0;
626 if (sQuote2 && sQuote1) {
627 // both delimiters present: charSet'lang'rawVal
628 rawValStart = sQuote2 + 1;
629 rawValLength = valueEnd - rawValStart;
631 langStart = sQuote1 + 1;
632 langLength = sQuote2 - langStart;
634 charsetStart = valueStart;
635 charsetLength = sQuote1 - charsetStart;
636 } else if (sQuote1) {
637 // one delimiter; assume charset'rawVal
638 rawValStart = sQuote1 + 1;
639 rawValLength = valueEnd - rawValStart;
641 charsetStart = valueStart;
642 charsetLength = sQuote1 - valueStart;
643 } else {
644 // no delimiter: just rawVal
645 rawValStart = valueStart;
646 rawValLength = valueEnd - valueStart;
649 if (langLength != 0) {
650 lang.Assign(langStart, langLength);
653 // keep the charset for later
654 if (caseB) {
655 charsetB.Assign(charsetStart, charsetLength);
656 } else {
657 // if caseCorD
658 charsetCD.Assign(charsetStart, charsetLength);
661 // non-empty value part
662 if (rawValLength > 0) {
663 if (!caseBResult && caseB) {
664 if (!IsValidPercentEscaped(rawValStart, rawValLength)) {
665 goto increment_str;
668 // allocate buffer for the raw value
669 char* tmpResult = (char*)moz_xmemdup(rawValStart, rawValLength + 1);
670 *(tmpResult + rawValLength) = 0;
672 nsUnescape(tmpResult);
673 caseBResult = tmpResult;
674 } else {
675 // caseC
676 bool added = addContinuation(segments, 0, rawValStart, rawValLength,
677 needExtDecoding, isQuotedString);
679 if (!added) {
680 // continuation not added, stop processing them
681 acceptContinuations = false;
685 } // end of if-block : title*0*= or title*=
686 // caseD: a line of multiline param with no need for unescaping :
687 // title*[0-9]= or 2nd or later lines of a caseC param : title*[1-9]*=
688 else if (acceptContinuations && segmentNumber != -1) {
689 uint32_t valueLength = valueEnd - valueStart;
691 bool added =
692 addContinuation(segments, segmentNumber, valueStart, valueLength,
693 needExtDecoding, isQuotedString);
695 if (!added) {
696 // continuation not added, stop processing them
697 acceptContinuations = false;
699 } // end of if-block : title*[0-9]= or title*[1-9]*=
702 // str now points after the end of the value.
703 // skip over whitespace, ';', whitespace.
704 increment_str:
705 while (nsCRT::IsAsciiSpace(*str)) ++str;
706 if (*str == ';') {
707 ++str;
708 } else {
709 // stop processing the header field; either we are done or the
710 // separator was missing
711 break;
713 while (nsCRT::IsAsciiSpace(*str)) ++str;
716 caseCDResult = combineContinuations(segments);
718 if (caseBResult && !charsetB.IsEmpty()) {
719 // check that the 2231/5987 result decodes properly given the
720 // specified character set
721 if (!IsValidOctetSequenceForCharset(charsetB, caseBResult)) {
722 caseBResult = nullptr;
726 if (caseCDResult && !charsetCD.IsEmpty()) {
727 // check that the 2231/5987 result decodes properly given the
728 // specified character set
729 if (!IsValidOctetSequenceForCharset(charsetCD, caseCDResult)) {
730 caseCDResult = nullptr;
734 if (caseBResult) {
735 // prefer simple 5987 format over 2231 with continuations
736 *aResult = caseBResult;
737 caseBResult = nullptr;
738 charset.Assign(charsetB);
739 } else if (caseCDResult) {
740 // prefer 2231/5987 with or without continuations over plain format
741 *aResult = caseCDResult;
742 caseCDResult = nullptr;
743 charset.Assign(charsetCD);
744 } else if (caseAResult) {
745 *aResult = caseAResult;
746 caseAResult = nullptr;
749 // free unused stuff
750 free(caseAResult);
751 free(caseBResult);
752 free(caseCDResult);
754 // if we have a result
755 if (*aResult) {
756 // then return charset and lang as well
757 if (aLang && !lang.IsEmpty()) {
758 uint32_t len = lang.Length();
759 *aLang = (char*)moz_xmemdup(lang.BeginReading(), len + 1);
760 *(*aLang + len) = 0;
762 if (aCharset && !charset.IsEmpty()) {
763 uint32_t len = charset.Length();
764 *aCharset = (char*)moz_xmemdup(charset.BeginReading(), len + 1);
765 *(*aCharset + len) = 0;
769 return *aResult ? NS_OK : NS_ERROR_INVALID_ARG;
772 nsresult internalDecodeRFC2047Header(const char* aHeaderVal,
773 const nsACString& aDefaultCharset,
774 bool aOverrideCharset,
775 bool aEatContinuations,
776 nsACString& aResult) {
777 aResult.Truncate();
778 if (!aHeaderVal) return NS_ERROR_INVALID_ARG;
779 if (!*aHeaderVal) return NS_OK;
781 // If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string but
782 // aDefaultCharset is specified, decodes RFC 2047 encoding and converts
783 // to UTF-8. Otherwise, just strips away CRLF.
784 if (strstr(aHeaderVal, "=?") ||
785 (!aDefaultCharset.IsEmpty() &&
786 (!IsUtf8(nsDependentCString(aHeaderVal)) ||
787 Is7bitNonAsciiString(aHeaderVal, strlen(aHeaderVal))))) {
788 DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult);
789 } else if (aEatContinuations &&
790 (strchr(aHeaderVal, '\n') || strchr(aHeaderVal, '\r'))) {
791 aResult = aHeaderVal;
792 } else {
793 aEatContinuations = false;
794 aResult = aHeaderVal;
797 if (aEatContinuations) {
798 nsAutoCString temp(aResult);
799 temp.ReplaceSubstring("\n\t", " ");
800 temp.ReplaceSubstring("\r\t", " ");
801 temp.StripCRLF();
802 aResult = temp;
805 return NS_OK;
808 NS_IMETHODIMP
809 nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal,
810 const char* aDefaultCharset,
811 bool aOverrideCharset,
812 bool aEatContinuations,
813 nsACString& aResult) {
814 return internalDecodeRFC2047Header(aHeaderVal, nsCString(aDefaultCharset),
815 aOverrideCharset, aEatContinuations,
816 aResult);
819 // true if the character is allowed in a RFC 5987 value
820 // see RFC 5987, Section 3.2.1, "attr-char"
821 bool IsRFC5987AttrChar(char aChar) {
822 char c = aChar;
824 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
825 (c >= '0' && c <= '9') ||
826 (c == '!' || c == '#' || c == '$' || c == '&' || c == '+' ||
827 c == '-' || c == '.' || c == '^' || c == '_' || c == '`' ||
828 c == '|' || c == '~');
831 // percent-decode a value
832 // returns false on failure
833 bool PercentDecode(nsACString& aValue) {
834 char* c = (char*)moz_xmalloc(aValue.Length() + 1);
836 strcpy(c, PromiseFlatCString(aValue).get());
837 nsUnescape(c);
838 aValue.Assign(c);
839 free(c);
841 return true;
844 // Decode a parameter value using the encoding defined in RFC 5987
846 // charset "'" [ language ] "'" value-chars
847 NS_IMETHODIMP
848 nsMIMEHeaderParamImpl::DecodeRFC5987Param(const nsACString& aParamVal,
849 nsACString& aLang,
850 nsAString& aResult) {
851 nsAutoCString charset;
852 nsAutoCString language;
853 nsAutoCString value;
855 uint32_t delimiters = 0;
856 const nsCString& encoded = PromiseFlatCString(aParamVal);
857 const char* c = encoded.get();
859 while (*c) {
860 char tc = *c++;
862 if (tc == '\'') {
863 // single quote
864 delimiters++;
865 } else if (((unsigned char)tc) >= 128) {
866 // fail early, not ASCII
867 NS_WARNING("non-US-ASCII character in RFC5987-encoded param");
868 return NS_ERROR_INVALID_ARG;
869 } else {
870 if (delimiters == 0) {
871 // valid characters are checked later implicitly
872 charset.Append(tc);
873 } else if (delimiters == 1) {
874 // no value checking for now
875 language.Append(tc);
876 } else if (delimiters == 2) {
877 if (IsRFC5987AttrChar(tc)) {
878 value.Append(tc);
879 } else if (tc == '%') {
880 if (!IsHexDigit(c[0]) || !IsHexDigit(c[1])) {
881 // we expect two more characters
882 NS_WARNING("broken %-escape in RFC5987-encoded param");
883 return NS_ERROR_INVALID_ARG;
885 value.Append(tc);
886 // we consume two more
887 value.Append(*c++);
888 value.Append(*c++);
889 } else {
890 // character not allowed here
891 NS_WARNING("invalid character in RFC5987-encoded param");
892 return NS_ERROR_INVALID_ARG;
898 if (delimiters != 2) {
899 NS_WARNING("missing delimiters in RFC5987-encoded param");
900 return NS_ERROR_INVALID_ARG;
903 // abort early for unsupported encodings
904 if (!charset.LowerCaseEqualsLiteral("utf-8")) {
905 NS_WARNING("unsupported charset in RFC5987-encoded param");
906 return NS_ERROR_INVALID_ARG;
909 // percent-decode
910 if (!PercentDecode(value)) {
911 return NS_ERROR_OUT_OF_MEMORY;
914 // return the encoding
915 aLang.Assign(language);
917 // finally convert octet sequence to UTF-8 and be done
918 nsAutoCString utf8;
919 nsresult rv = ConvertStringToUTF8(value, charset, true, false, utf8);
920 NS_ENSURE_SUCCESS(rv, rv);
922 CopyUTF8toUTF16(utf8, aResult);
923 return NS_OK;
926 nsresult internalDecodeParameter(const nsACString& aParamValue,
927 const nsACString& aCharset,
928 const nsACString& aDefaultCharset,
929 bool aOverrideCharset, bool aDecode2047,
930 nsACString& aResult) {
931 aResult.Truncate();
932 // If aCharset is given, aParamValue was obtained from RFC2231/5987
933 // encoding and we're pretty sure that it's in aCharset.
934 if (!aCharset.IsEmpty()) {
935 return ConvertStringToUTF8(aParamValue, aCharset, true, true, aResult);
938 const nsCString& param = PromiseFlatCString(aParamValue);
939 nsAutoCString unQuoted;
940 nsACString::const_iterator s, e;
941 param.BeginReading(s);
942 param.EndReading(e);
944 // strip '\' when used to quote CR, LF, '"' and '\'
945 for (; s != e; ++s) {
946 if ((*s == '\\')) {
947 if (++s == e) {
948 --s; // '\' is at the end. move back and append '\'.
949 } else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' &&
950 *s != '\\') {
951 --s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\'
953 // else : skip '\' and append the quoted character.
955 unQuoted.Append(*s);
958 aResult = unQuoted;
959 nsresult rv = NS_OK;
961 if (aDecode2047) {
962 nsAutoCString decoded;
964 // Try RFC 2047 encoding, instead.
965 rv = internalDecodeRFC2047Header(unQuoted.get(), aDefaultCharset,
966 aOverrideCharset, true, decoded);
968 if (NS_SUCCEEDED(rv) && !decoded.IsEmpty()) aResult = decoded;
971 return rv;
974 NS_IMETHODIMP
975 nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue,
976 const char* aCharset,
977 const char* aDefaultCharset,
978 bool aOverrideCharset,
979 nsACString& aResult) {
980 return internalDecodeParameter(aParamValue, nsCString(aCharset),
981 nsCString(aDefaultCharset), aOverrideCharset,
982 true, aResult);
985 #define ISHEXCHAR(c) \
986 ((0x30 <= uint8_t(c) && uint8_t(c) <= 0x39) || \
987 (0x41 <= uint8_t(c) && uint8_t(c) <= 0x46) || \
988 (0x61 <= uint8_t(c) && uint8_t(c) <= 0x66))
990 // Decode Q encoding (RFC 2047).
991 // static
992 char* DecodeQ(const char* in, uint32_t length) {
993 char *out, *dest = nullptr;
995 out = dest = (char*)calloc(length + 1, sizeof(char));
996 if (dest == nullptr) return nullptr;
997 while (length > 0) {
998 unsigned c = 0;
999 switch (*in) {
1000 case '=':
1001 // check if |in| in the form of '=hh' where h is [0-9a-fA-F].
1002 if (length < 3 || !ISHEXCHAR(in[1]) || !ISHEXCHAR(in[2])) {
1003 goto badsyntax;
1005 PR_sscanf(in + 1, "%2X", &c);
1006 *out++ = (char)c;
1007 in += 3;
1008 length -= 3;
1009 break;
1011 case '_':
1012 *out++ = ' ';
1013 in++;
1014 length--;
1015 break;
1017 default:
1018 if (*in & 0x80) goto badsyntax;
1019 *out++ = *in++;
1020 length--;
1023 *out++ = '\0';
1025 for (out = dest; *out; ++out) {
1026 if (*out == '\t') *out = ' ';
1029 return dest;
1031 badsyntax:
1032 free(dest);
1033 return nullptr;
1036 // check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842))
1037 // or has ESC which may be an indication that it's in one of many ISO
1038 // 2022 7bit encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554).
1039 // static
1040 bool Is7bitNonAsciiString(const char* input, uint32_t len) {
1041 int32_t c;
1043 enum {
1044 hz_initial, // No HZ seen yet
1045 hz_escaped, // Inside an HZ ~{ escape sequence
1046 hz_seen, // Have seen at least one complete HZ sequence
1047 hz_notpresent // Have seen something that is not legal HZ
1048 } hz_state;
1050 hz_state = hz_initial;
1051 while (len) {
1052 c = uint8_t(*input++);
1053 len--;
1054 if (c & 0x80) return false;
1055 if (c == 0x1B) return true;
1056 if (c == '~') {
1057 switch (hz_state) {
1058 case hz_initial:
1059 case hz_seen:
1060 if (*input == '{') {
1061 hz_state = hz_escaped;
1062 } else if (*input == '~') {
1063 // ~~ is the HZ encoding of ~. Skip over second ~ as well
1064 hz_state = hz_seen;
1065 input++;
1066 len--;
1067 } else {
1068 hz_state = hz_notpresent;
1070 break;
1072 case hz_escaped:
1073 if (*input == '}') hz_state = hz_seen;
1074 break;
1075 default:
1076 break;
1080 return hz_state == hz_seen;
1083 #define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD)
1085 // copy 'raw' sequences of octets in aInput to aOutput.
1086 // If aDefaultCharset is specified, the input is assumed to be in the
1087 // charset and converted to UTF-8. Otherwise, a blind copy is made.
1088 // If aDefaultCharset is specified, but the conversion to UTF-8
1089 // is not successful, each octet is replaced by Unicode replacement
1090 // chars. *aOutput is advanced by the number of output octets.
1091 // static
1092 void CopyRawHeader(const char* aInput, uint32_t aLen,
1093 const nsACString& aDefaultCharset, nsACString& aOutput) {
1094 int32_t c;
1096 // If aDefaultCharset is not specified, make a blind copy.
1097 if (aDefaultCharset.IsEmpty()) {
1098 aOutput.Append(aInput, aLen);
1099 return;
1102 // Copy as long as it's US-ASCII. An ESC may indicate ISO 2022
1103 // A ~ may indicate it is HZ
1104 while (aLen && (c = uint8_t(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) {
1105 aOutput.Append(char(c));
1106 aLen--;
1108 if (!aLen) {
1109 return;
1111 aInput--;
1113 // skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii
1114 // string and aDefaultCharset is a 7bit non-ascii charset.
1115 bool skipCheck =
1116 (c == 0x1B || c == '~') &&
1117 IS_7BIT_NON_ASCII_CHARSET(PromiseFlatCString(aDefaultCharset).get());
1119 // If not UTF-8, treat as default charset
1120 nsAutoCString utf8Text;
1121 if (NS_SUCCEEDED(ConvertStringToUTF8(Substring(aInput, aInput + aLen),
1122 PromiseFlatCString(aDefaultCharset),
1123 skipCheck, true, utf8Text))) {
1124 aOutput.Append(utf8Text);
1125 } else { // replace each octet with Unicode replacement char in UTF-8.
1126 for (uint32_t i = 0; i < aLen; i++) {
1127 c = uint8_t(*aInput++);
1128 if (c & 0x80) {
1129 aOutput.Append(REPLACEMENT_CHAR);
1130 } else {
1131 aOutput.Append(char(c));
1137 nsresult DecodeQOrBase64Str(const char* aEncoded, size_t aLen, char aQOrBase64,
1138 const nsACString& aCharset, nsACString& aResult) {
1139 char* decodedText;
1140 bool b64alloc = false;
1141 NS_ASSERTION(aQOrBase64 == 'Q' || aQOrBase64 == 'B', "Should be 'Q' or 'B'");
1142 if (aQOrBase64 == 'Q') {
1143 decodedText = DecodeQ(aEncoded, aLen);
1144 } else if (aQOrBase64 == 'B') {
1145 decodedText = PL_Base64Decode(aEncoded, aLen, nullptr);
1146 b64alloc = true;
1147 } else {
1148 return NS_ERROR_INVALID_ARG;
1151 if (!decodedText) {
1152 return NS_ERROR_INVALID_ARG;
1155 nsAutoCString utf8Text;
1156 // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
1157 nsresult rv = ConvertStringToUTF8(
1158 nsDependentCString(decodedText), aCharset,
1159 IS_7BIT_NON_ASCII_CHARSET(PromiseFlatCString(aCharset).get()), true,
1160 utf8Text);
1161 if (b64alloc) {
1162 PR_Free(decodedText);
1163 } else {
1164 free(decodedText);
1166 if (NS_FAILED(rv)) {
1167 return rv;
1169 aResult.Append(utf8Text);
1171 return NS_OK;
1174 static const char especials[] = R"(()<>@,;:\"/[]?.=)";
1176 // |decode_mime_part2_str| taken from comi18n.c
1177 // Decode RFC2047-encoded words in the input and convert the result to UTF-8.
1178 // If aOverrideCharset is true, charset in RFC2047-encoded words is
1179 // ignored and aDefaultCharset is assumed, instead. aDefaultCharset
1180 // is also used to convert raw octets (without RFC 2047 encoding) to UTF-8.
1181 // static
1182 nsresult DecodeRFC2047Str(const char* aHeader,
1183 const nsACString& aDefaultCharset,
1184 bool aOverrideCharset, nsACString& aResult) {
1185 const char *p, *q = nullptr, *r;
1186 const char* begin; // tracking pointer for where we are in the input buffer
1187 int32_t isLastEncodedWord = 0;
1188 const char *charsetStart, *charsetEnd;
1189 nsAutoCString prevCharset, curCharset;
1190 nsAutoCString encodedText;
1191 char prevEncoding = '\0', curEncoding;
1192 nsresult rv;
1194 begin = aHeader;
1196 // To avoid buffer realloc, if possible, set capacity in advance. No
1197 // matter what, more than 3x expansion can never happen for all charsets
1198 // supported by Mozilla. SCSU/BCSU with the sliding window set to a
1199 // non-BMP block may be exceptions, but Mozilla does not support them.
1200 // Neither any known mail/news program use them. Even if there's, we're
1201 // safe because we don't use a raw *char any more.
1202 aResult.SetCapacity(3 * strlen(aHeader));
1204 while ((p = strstr(begin, "=?")) != nullptr) {
1205 if (isLastEncodedWord) {
1206 // See if it's all whitespace.
1207 for (q = begin; q < p; ++q) {
1208 if (!strchr(" \t\r\n", *q)) {
1209 break;
1214 if (!isLastEncodedWord || q < p) {
1215 if (!encodedText.IsEmpty()) {
1216 rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
1217 prevEncoding, prevCharset, aResult);
1218 if (NS_FAILED(rv)) {
1219 aResult.Append(encodedText);
1221 encodedText.Truncate();
1222 prevCharset.Truncate();
1223 prevEncoding = '\0';
1225 // copy the part before the encoded-word
1226 CopyRawHeader(begin, p - begin, aDefaultCharset, aResult);
1227 begin = p;
1230 p += 2;
1232 // Get charset info
1233 charsetStart = p;
1234 charsetEnd = nullptr;
1235 for (q = p; *q != '?'; q++) {
1236 if (*q <= ' ' || strchr(especials, *q)) {
1237 goto badsyntax;
1240 // RFC 2231 section 5
1241 if (!charsetEnd && *q == '*') {
1242 charsetEnd = q;
1245 if (!charsetEnd) {
1246 charsetEnd = q;
1249 q++;
1250 curEncoding = nsCRT::ToUpper(*q);
1251 if (curEncoding != 'Q' && curEncoding != 'B') goto badsyntax;
1253 if (q[1] != '?') goto badsyntax;
1255 // loop-wise, keep going until we hit "?=". the inner check handles the
1256 // nul terminator should the string terminate before we hit the right
1257 // marker. (And the r[1] will never reach beyond the end of the string
1258 // because *r != '?' is true if r is the nul character.)
1259 for (r = q + 2; *r != '?' || r[1] != '='; r++) {
1260 if (*r < ' ') goto badsyntax;
1262 if (r == q + 2) {
1263 // it's empty, skip
1264 begin = r + 2;
1265 isLastEncodedWord = 1;
1266 continue;
1269 curCharset.Assign(charsetStart, charsetEnd - charsetStart);
1270 // Override charset if requested. Never override labeled UTF-8.
1271 // Use default charset instead of UNKNOWN-8BIT
1272 if ((aOverrideCharset &&
1273 0 != nsCRT::strcasecmp(curCharset.get(), "UTF-8")) ||
1274 (!aDefaultCharset.IsEmpty() &&
1275 0 == nsCRT::strcasecmp(curCharset.get(), "UNKNOWN-8BIT"))) {
1276 curCharset = aDefaultCharset;
1279 const char* R;
1280 R = r;
1281 if (curEncoding == 'B') {
1282 // bug 227290. ignore an extraneous '=' at the end.
1283 // (# of characters in B-encoded part has to be a multiple of 4)
1284 int32_t n = r - (q + 2);
1285 R -= (n % 4 == 1 && !strncmp(r - 3, "===", 3)) ? 1 : 0;
1287 // Bug 493544. Don't decode the encoded text until it ends
1288 if (R[-1] != '=' &&
1289 (prevCharset.IsEmpty() ||
1290 (curCharset == prevCharset && curEncoding == prevEncoding))) {
1291 encodedText.Append(q + 2, R - (q + 2));
1292 prevCharset = curCharset;
1293 prevEncoding = curEncoding;
1295 begin = r + 2;
1296 isLastEncodedWord = 1;
1297 continue;
1300 bool bDecoded; // If the current line has been decoded.
1301 bDecoded = false;
1302 if (!encodedText.IsEmpty()) {
1303 if (curCharset == prevCharset && curEncoding == prevEncoding) {
1304 encodedText.Append(q + 2, R - (q + 2));
1305 bDecoded = true;
1307 rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
1308 prevEncoding, prevCharset, aResult);
1309 if (NS_FAILED(rv)) {
1310 aResult.Append(encodedText);
1312 encodedText.Truncate();
1313 prevCharset.Truncate();
1314 prevEncoding = '\0';
1316 if (!bDecoded) {
1317 rv = DecodeQOrBase64Str(q + 2, R - (q + 2), curEncoding, curCharset,
1318 aResult);
1319 if (NS_FAILED(rv)) {
1320 aResult.Append(encodedText);
1324 begin = r + 2;
1325 isLastEncodedWord = 1;
1326 continue;
1328 badsyntax:
1329 if (!encodedText.IsEmpty()) {
1330 rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
1331 prevEncoding, prevCharset, aResult);
1332 if (NS_FAILED(rv)) {
1333 aResult.Append(encodedText);
1335 encodedText.Truncate();
1336 prevCharset.Truncate();
1338 // copy the part before the encoded-word
1339 aResult.Append(begin, p - begin);
1340 begin = p;
1341 isLastEncodedWord = 0;
1344 if (!encodedText.IsEmpty()) {
1345 rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
1346 prevEncoding, prevCharset, aResult);
1347 if (NS_FAILED(rv)) {
1348 aResult.Append(encodedText);
1352 // put the tail back
1353 CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult);
1355 nsAutoCString tempStr(aResult);
1356 tempStr.ReplaceChar('\t', ' ');
1357 aResult = tempStr;
1359 return NS_OK;