1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set sw=2 ts=8 et tw=80 : */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
14 #include "nsMIMEHeaderParamImpl.h"
15 #include "nsNativeCharsetUtils.h"
16 #include "mozilla/Encoding.h"
17 #include "mozilla/TextUtils.h"
18 #include "mozilla/Utf8.h"
20 using mozilla::Encoding
;
21 using mozilla::IsAscii
;
22 using mozilla::IsUtf8
;
24 // static functions declared below are moved from mailnews/mime/src/comi18n.cpp
26 static char* DecodeQ(const char*, uint32_t);
27 static bool Is7bitNonAsciiString(const char*, uint32_t);
28 static void CopyRawHeader(const char*, uint32_t, const nsACString
&,
30 static nsresult
DecodeRFC2047Str(const char*, const nsACString
&, bool,
32 static nsresult
internalDecodeParameter(const nsACString
&, const nsACString
&,
33 const nsACString
&, bool, bool,
36 static nsresult
ToUTF8(const nsACString
& aString
, const nsACString
& aCharset
,
37 bool aAllowSubstitution
, nsACString
& aResult
) {
38 if (aCharset
.IsEmpty()) {
39 return NS_ERROR_INVALID_ARG
;
42 const auto* encoding
= Encoding::ForLabelNoReplacement(aCharset
);
44 return NS_ERROR_UCONV_NOCONV
;
46 if (aAllowSubstitution
) {
47 nsresult rv
= encoding
->DecodeWithoutBOMHandling(aString
, aResult
);
48 if (NS_SUCCEEDED(rv
)) {
53 return encoding
->DecodeWithoutBOMHandlingAndWithoutReplacement(aString
,
57 static nsresult
ConvertStringToUTF8(const nsACString
& aString
,
58 const nsACString
& aCharset
, bool aSkipCheck
,
59 bool aAllowSubstitution
,
60 nsACString
& aUTF8String
) {
61 // return if ASCII only or valid UTF-8 providing that the ASCII/UTF-8
62 // check is requested. It may not be asked for if a caller suspects
63 // that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or
64 // it's in a charset other than UTF-8 that can be mistaken for UTF-8.
65 if (!aSkipCheck
&& (IsAscii(aString
) || IsUtf8(aString
))) {
66 aUTF8String
= aString
;
70 aUTF8String
.Truncate();
72 nsresult rv
= ToUTF8(aString
, aCharset
, aAllowSubstitution
, aUTF8String
);
74 // additional protection for cases where check is skipped and the input
75 // is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch
76 // was wrong.) We don't check ASCIIness assuming there's no charset
77 // incompatible with ASCII (we don't support EBCDIC).
78 if (aSkipCheck
&& NS_FAILED(rv
) && IsUtf8(aString
)) {
79 aUTF8String
= aString
;
86 // XXX The chance of UTF-7 being used in the message header is really
87 // low, but in theory it's possible.
88 #define IS_7BIT_NON_ASCII_CHARSET(cset) \
89 (!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \
90 !nsCRT::strncasecmp((cset), "HZ-GB", 5) || \
91 !nsCRT::strncasecmp((cset), "UTF-7", 5))
93 NS_IMPL_ISUPPORTS(nsMIMEHeaderParamImpl
, nsIMIMEHeaderParam
)
96 nsMIMEHeaderParamImpl::GetParameter(const nsACString
& aHeaderVal
,
97 const char* aParamName
,
98 const nsACString
& aFallbackCharset
,
99 bool aTryLocaleCharset
, char** aLang
,
100 nsAString
& aResult
) {
101 return DoGetParameter(aHeaderVal
, aParamName
, MIME_FIELD_ENCODING
,
102 aFallbackCharset
, aTryLocaleCharset
, aLang
, aResult
);
106 nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString
& aHeaderVal
,
107 const char* aParamName
,
108 const nsACString
& aFallbackCharset
,
109 bool aTryLocaleCharset
, char** aLang
,
110 nsAString
& aResult
) {
111 return DoGetParameter(aHeaderVal
, aParamName
, HTTP_FIELD_ENCODING
,
112 aFallbackCharset
, aTryLocaleCharset
, aLang
, aResult
);
116 nsresult
nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString
& aHeaderVal
,
117 const char* aParamName
,
118 nsAString
& aResult
) {
119 return DoGetParameter(aHeaderVal
, aParamName
, HTTP_FIELD_ENCODING
, ""_ns
,
120 false, nullptr, aResult
);
124 // detects any non-null characters pass null
125 bool nsMIMEHeaderParamImpl::ContainsTrailingCharPastNull(
126 const nsACString
& aVal
) {
127 nsACString::const_iterator first
;
128 aVal
.BeginReading(first
);
129 nsACString::const_iterator end
;
130 aVal
.EndReading(end
);
132 if (FindCharInReadable(L
'\0', first
, end
)) {
133 while (first
!= end
) {
134 if (*first
!= '\0') {
135 // contains trailing characters past the null character
144 // XXX : aTryLocaleCharset is not yet effective.
146 nsresult
nsMIMEHeaderParamImpl::DoGetParameter(
147 const nsACString
& aHeaderVal
, const char* aParamName
,
148 ParamDecoding aDecoding
, const nsACString
& aFallbackCharset
,
149 bool aTryLocaleCharset
, char** aLang
, nsAString
& aResult
) {
153 // get parameter (decode RFC 2231/5987 when applicable, as specified by
154 // aDecoding (5987 being a subset of 2231) and return charset.)
157 rv
= DoParameterInternal(aHeaderVal
, aParamName
, aDecoding
,
158 getter_Copies(charset
), aLang
, getter_Copies(med
));
159 if (NS_FAILED(rv
)) return rv
;
161 // convert to UTF-8 after charset conversion and RFC 2047 decoding
165 rv
= internalDecodeParameter(med
, charset
, ""_ns
, false,
166 // was aDecoding == MIME_FIELD_ENCODING
169 NS_ENSURE_SUCCESS(rv
, rv
);
171 if (!aFallbackCharset
.IsEmpty()) {
172 const Encoding
* encoding
= Encoding::ForLabel(aFallbackCharset
);
174 if (NS_SUCCEEDED(ConvertStringToUTF8(str1
, aFallbackCharset
, false,
175 encoding
!= UTF_8_ENCODING
, str2
))) {
176 CopyUTF8toUTF16(str2
, aResult
);
182 CopyUTF8toUTF16(str1
, aResult
);
186 if (aTryLocaleCharset
&& !NS_IsNativeUTF8()) {
187 return NS_CopyNativeToUnicode(str1
, aResult
);
190 CopyASCIItoUTF16(str1
, aResult
);
194 // remove backslash-encoded sequences from quoted-strings
195 // modifies string in place, potentially shortening it
196 void RemoveQuotedStringEscapes(char* src
) {
199 for (char* c
= src
; *c
; ++c
) {
200 if (c
[0] == '\\' && c
[1]) {
201 // skip backslash if not at end
209 // true is character is a hex digit
210 bool IsHexDigit(char aChar
) {
213 return (c
>= 'a' && c
<= 'f') || (c
>= 'A' && c
<= 'F') ||
214 (c
>= '0' && c
<= '9');
217 // validate that a C String containing %-escapes is syntactically valid
218 bool IsValidPercentEscaped(const char* aValue
, int32_t len
) {
219 for (int32_t i
= 0; i
< len
; i
++) {
220 if (aValue
[i
] == '%') {
221 if (!IsHexDigit(aValue
[i
+ 1]) || !IsHexDigit(aValue
[i
+ 2])) {
229 // Support for continuations (RFC 2231, Section 3)
231 // only a sane number supported
232 #define MAX_CONTINUATIONS 999
234 // part of a continuation
238 Continuation(const char* aValue
, uint32_t aLength
, bool aNeedsPercentDecoding
,
239 bool aWasQuotedString
) {
242 needsPercentDecoding
= aNeedsPercentDecoding
;
243 wasQuotedString
= aWasQuotedString
;
246 // empty constructor needed for nsTArray
249 needsPercentDecoding
= false;
250 wasQuotedString
= false;
252 ~Continuation() = default;
256 bool needsPercentDecoding
;
257 bool wasQuotedString
;
260 // combine segments into a single string, returning the allocated string
261 // (or nullptr) while emptying the list
262 char* combineContinuations(nsTArray
<Continuation
>& aArray
) {
264 if (aArray
.Length() == 0) return nullptr;
266 // Get an upper bound for the length
268 for (uint32_t i
= 0; i
< aArray
.Length(); i
++) {
269 length
+= aArray
[i
].length
;
273 char* result
= (char*)moz_xmalloc(length
+ 1);
278 for (uint32_t i
= 0; i
< aArray
.Length(); i
++) {
279 Continuation cont
= aArray
[i
];
280 if (!cont
.value
) break;
282 char* c
= result
+ strlen(result
);
283 strncat(result
, cont
.value
, cont
.length
);
284 if (cont
.needsPercentDecoding
) {
287 if (cont
.wasQuotedString
) {
288 RemoveQuotedStringEscapes(c
);
292 // return null if empty value
293 if (*result
== '\0') {
301 // add a continuation, return false on error if segment already has been seen
302 bool addContinuation(nsTArray
<Continuation
>& aArray
, uint32_t aIndex
,
303 const char* aValue
, uint32_t aLength
,
304 bool aNeedsPercentDecoding
, bool aWasQuotedString
) {
305 if (aIndex
< aArray
.Length() && aArray
[aIndex
].value
) {
306 NS_WARNING("duplicate RC2231 continuation segment #\n");
310 if (aIndex
> MAX_CONTINUATIONS
) {
311 NS_WARNING("RC2231 continuation segment # exceeds limit\n");
315 if (aNeedsPercentDecoding
&& aWasQuotedString
) {
317 "RC2231 continuation segment can't use percent encoding and quoted "
318 "string form at the same time\n");
322 Continuation
cont(aValue
, aLength
, aNeedsPercentDecoding
, aWasQuotedString
);
324 if (aArray
.Length() <= aIndex
) {
325 aArray
.SetLength(aIndex
+ 1);
327 aArray
[aIndex
] = cont
;
332 // parse a segment number; return -1 on error
333 int32_t parseSegmentNumber(const char* aValue
, int32_t aLen
) {
335 NS_WARNING("segment number missing\n");
339 if (aLen
> 1 && aValue
[0] == '0') {
340 NS_WARNING("leading '0' not allowed in segment number\n");
344 int32_t segmentNumber
= 0;
346 for (int32_t i
= 0; i
< aLen
; i
++) {
347 if (!(aValue
[i
] >= '0' && aValue
[i
] <= '9')) {
348 NS_WARNING("invalid characters in segment number\n");
353 segmentNumber
+= aValue
[i
] - '0';
354 if (segmentNumber
> MAX_CONTINUATIONS
) {
355 NS_WARNING("Segment number exceeds sane size\n");
360 return segmentNumber
;
363 // validate a given octet sequence for compliance with the specified
365 bool IsValidOctetSequenceForCharset(const nsACString
& aCharset
,
366 const char* aOctets
) {
367 nsAutoCString tmpRaw
;
368 tmpRaw
.Assign(aOctets
);
369 nsAutoCString tmpDecoded
;
371 nsresult rv
= ConvertStringToUTF8(tmpRaw
, aCharset
, false, false, tmpDecoded
);
374 // we can't decode; charset may be unsupported, or the octet sequence
375 // is broken (illegal or incomplete octet sequence contained)
377 "RFC2231/5987 parameter value does not decode according to specified "
385 // moved almost verbatim from mimehdrs.cpp
387 // MimeHeaders_get_parameter (const char *header_value, const char *parm_name,
388 // char **charset, char **language)
390 // The format of these header lines is
391 // <token> [ ';' <token> '=' <token-or-quoted-string> ]*
393 nsMIMEHeaderParamImpl::GetParameterInternal(const nsACString
& aHeaderValue
,
394 const char* aParamName
,
395 char** aCharset
, char** aLang
,
397 return DoParameterInternal(aHeaderValue
, aParamName
, MIME_FIELD_ENCODING
,
398 aCharset
, aLang
, aResult
);
402 nsresult
nsMIMEHeaderParamImpl::DoParameterInternal(
403 const nsACString
& aHeaderValue
, const char* aParamName
,
404 ParamDecoding aDecoding
, char** aCharset
, char** aLang
, char** aResult
) {
405 if (aHeaderValue
.IsEmpty() || !aResult
) {
406 return NS_ERROR_INVALID_ARG
;
409 if (ContainsTrailingCharPastNull(aHeaderValue
)) {
411 return NS_ERROR_INVALID_ARG
;
414 const nsCString
& flat
= PromiseFlatCString(aHeaderValue
);
415 const char* str
= flat
.get();
418 return NS_ERROR_INVALID_ARG
;
423 if (aCharset
) *aCharset
= nullptr;
424 if (aLang
) *aLang
= nullptr;
426 nsAutoCString charset
;
428 // change to (aDecoding != HTTP_FIELD_ENCODING) when we want to disable
429 // them for HTTP header fields later on, see bug 776324
430 bool acceptContinuations
= true;
432 // skip leading white space.
433 for (; *str
&& nsCRT::IsAsciiSpace(*str
); ++str
) {
436 const char* start
= str
;
438 // aParamName is empty. return the first (possibly) _unnamed_ 'parameter'
439 // For instance, return 'inline' in the following case:
440 // Content-Disposition: inline; filename=.....
441 if (!aParamName
|| !*aParamName
) {
442 for (; *str
&& *str
!= ';' && !nsCRT::IsAsciiSpace(*str
); ++str
) {
445 if (str
== start
) return NS_ERROR_FIRST_HEADER_FIELD_COMPONENT_EMPTY
;
447 *aResult
= (char*)moz_xmemdup(start
, (str
- start
) + 1);
448 (*aResult
)[str
- start
] = '\0'; // null-terminate
452 /* Skip forward to first ';' */
453 for (; *str
&& *str
!= ';' && *str
!= ','; ++str
) {
457 /* Skip over following whitespace */
458 for (; *str
&& nsCRT::IsAsciiSpace(*str
); ++str
) {
462 // Some broken http servers just specify parameters
463 // like 'filename' without specifying disposition
464 // method. Rewind to the first non-white-space
467 if (!*str
) str
= start
;
469 // RFC2231 - The legitimate parm format can be:
470 // A. title=ThisIsTitle
471 // B. title*=us-ascii'en-us'This%20is%20wierd.
472 // C. title*0*=us-ascii'en'This%20is%20wierd.%20We
473 // title*1*=have%20to%20support%20this.
475 // D. title*0="Hey, what you think you are doing?"
476 // title*1="There is no charset and lang info."
477 // RFC5987: only A and B
479 // collect results for the different algorithms (plain filename,
480 // RFC5987/2231-encoded filename, + continuations) separately and decide
481 // which to use at the end
482 char* caseAResult
= nullptr;
483 char* caseBResult
= nullptr;
484 char* caseCDResult
= nullptr;
486 // collect continuation segments
487 nsTArray
<Continuation
> segments
;
489 // our copies of the charset parameter, kept separately as they might
490 // differ for the two formats
491 nsDependentCSubstring charsetB
, charsetCD
;
493 nsDependentCSubstring lang
;
495 int32_t paramLen
= strlen(aParamName
);
500 const char* nameStart
= str
;
501 const char* nameEnd
= nullptr;
502 const char* valueStart
= nullptr;
503 const char* valueEnd
= nullptr;
504 bool isQuotedString
= false;
506 NS_ASSERTION(!nsCRT::IsAsciiSpace(*str
), "should be after whitespace.");
508 // Skip forward to the end of this token.
509 for (; *str
&& !nsCRT::IsAsciiSpace(*str
) && *str
!= '=' && *str
!= ';';
515 int32_t nameLen
= nameEnd
- nameStart
;
517 // Skip over whitespace, '=', and whitespace
518 while (nsCRT::IsAsciiSpace(*str
)) ++str
;
523 // don't accept parameters without "="
526 // Skip over '=' only if it was actually there
528 while (nsCRT::IsAsciiSpace(*str
)) ++str
;
531 // The value is a token, not a quoted string.
533 for (valueEnd
= str
; *valueEnd
&& *valueEnd
!= ';'; valueEnd
++) {
536 // ignore trailing whitespace:
537 while (valueEnd
> valueStart
&& nsCRT::IsAsciiSpace(*(valueEnd
- 1))) {
542 isQuotedString
= true;
546 for (valueEnd
= str
; *valueEnd
; ++valueEnd
) {
547 if (*valueEnd
== '\\' && *(valueEnd
+ 1)) {
549 } else if (*valueEnd
== '"') {
554 // *valueEnd != null means that *valueEnd is quote character.
555 if (*valueEnd
) str
++;
558 // See if this is the simplest case (case A above),
559 // a 'single' line value with no charset and lang.
560 // If so, copy it and return.
561 if (nameLen
== paramLen
&&
562 !nsCRT::strncasecmp(nameStart
, aParamName
, paramLen
)) {
564 // we already have one caseA result, ignore subsequent ones
568 // if the parameter spans across multiple lines we have to strip out the
569 // line continuation -- jht 4/29/98
570 nsAutoCString
tempStr(valueStart
, valueEnd
- valueStart
);
572 char* res
= ToNewCString(tempStr
, mozilla::fallible
);
573 NS_ENSURE_TRUE(res
, NS_ERROR_OUT_OF_MEMORY
);
575 if (isQuotedString
) RemoveQuotedStringEscapes(res
);
578 // keep going, we may find a RFC 2231/5987 encoded alternative
581 else if (nameLen
> paramLen
&&
582 !nsCRT::strncasecmp(nameStart
, aParamName
, paramLen
) &&
583 *(nameStart
+ paramLen
) == '*') {
585 const char* cp
= nameStart
+ paramLen
+ 1;
587 // if param name ends in "*" we need do to RFC5987 "ext-value" decoding
588 bool needExtDecoding
= *(nameEnd
- 1) == '*';
590 bool caseB
= nameLen
== paramLen
+ 1;
591 bool caseCStart
= (*cp
== '0') && needExtDecoding
;
593 // parse the segment number
594 int32_t segmentNumber
= -1;
596 int32_t segLen
= (nameEnd
- cp
) - (needExtDecoding
? 1 : 0);
597 segmentNumber
= parseSegmentNumber(cp
, segLen
);
599 if (segmentNumber
== -1) {
600 acceptContinuations
= false;
605 // CaseB and start of CaseC: requires charset and optional language
606 // in quotes (quotes required even if lang is blank)
607 if (caseB
|| (caseCStart
&& acceptContinuations
)) {
608 // look for single quotation mark(')
609 const char* sQuote1
= strchr(valueStart
, 0x27);
610 const char* sQuote2
= sQuote1
? strchr(sQuote1
+ 1, 0x27) : nullptr;
612 // Two single quotation marks must be present even in
613 // absence of charset and lang.
614 if (!sQuote1
|| !sQuote2
) {
616 "Mandatory two single quotes are missing in header parameter\n");
619 const char* charsetStart
= nullptr;
620 int32_t charsetLength
= 0;
621 const char* langStart
= nullptr;
622 int32_t langLength
= 0;
623 const char* rawValStart
= nullptr;
624 int32_t rawValLength
= 0;
626 if (sQuote2
&& sQuote1
) {
627 // both delimiters present: charSet'lang'rawVal
628 rawValStart
= sQuote2
+ 1;
629 rawValLength
= valueEnd
- rawValStart
;
631 langStart
= sQuote1
+ 1;
632 langLength
= sQuote2
- langStart
;
634 charsetStart
= valueStart
;
635 charsetLength
= sQuote1
- charsetStart
;
636 } else if (sQuote1
) {
637 // one delimiter; assume charset'rawVal
638 rawValStart
= sQuote1
+ 1;
639 rawValLength
= valueEnd
- rawValStart
;
641 charsetStart
= valueStart
;
642 charsetLength
= sQuote1
- valueStart
;
644 // no delimiter: just rawVal
645 rawValStart
= valueStart
;
646 rawValLength
= valueEnd
- valueStart
;
649 if (langLength
!= 0) {
650 lang
.Assign(langStart
, langLength
);
653 // keep the charset for later
655 charsetB
.Assign(charsetStart
, charsetLength
);
658 charsetCD
.Assign(charsetStart
, charsetLength
);
661 // non-empty value part
662 if (rawValLength
> 0) {
663 if (!caseBResult
&& caseB
) {
664 if (!IsValidPercentEscaped(rawValStart
, rawValLength
)) {
668 // allocate buffer for the raw value
669 char* tmpResult
= (char*)moz_xmemdup(rawValStart
, rawValLength
+ 1);
670 *(tmpResult
+ rawValLength
) = 0;
672 nsUnescape(tmpResult
);
673 caseBResult
= tmpResult
;
676 bool added
= addContinuation(segments
, 0, rawValStart
, rawValLength
,
677 needExtDecoding
, isQuotedString
);
680 // continuation not added, stop processing them
681 acceptContinuations
= false;
685 } // end of if-block : title*0*= or title*=
686 // caseD: a line of multiline param with no need for unescaping :
687 // title*[0-9]= or 2nd or later lines of a caseC param : title*[1-9]*=
688 else if (acceptContinuations
&& segmentNumber
!= -1) {
689 uint32_t valueLength
= valueEnd
- valueStart
;
692 addContinuation(segments
, segmentNumber
, valueStart
, valueLength
,
693 needExtDecoding
, isQuotedString
);
696 // continuation not added, stop processing them
697 acceptContinuations
= false;
699 } // end of if-block : title*[0-9]= or title*[1-9]*=
702 // str now points after the end of the value.
703 // skip over whitespace, ';', whitespace.
705 while (nsCRT::IsAsciiSpace(*str
)) ++str
;
709 // stop processing the header field; either we are done or the
710 // separator was missing
713 while (nsCRT::IsAsciiSpace(*str
)) ++str
;
716 caseCDResult
= combineContinuations(segments
);
718 if (caseBResult
&& !charsetB
.IsEmpty()) {
719 // check that the 2231/5987 result decodes properly given the
720 // specified character set
721 if (!IsValidOctetSequenceForCharset(charsetB
, caseBResult
)) {
722 caseBResult
= nullptr;
726 if (caseCDResult
&& !charsetCD
.IsEmpty()) {
727 // check that the 2231/5987 result decodes properly given the
728 // specified character set
729 if (!IsValidOctetSequenceForCharset(charsetCD
, caseCDResult
)) {
730 caseCDResult
= nullptr;
735 // prefer simple 5987 format over 2231 with continuations
736 *aResult
= caseBResult
;
737 caseBResult
= nullptr;
738 charset
.Assign(charsetB
);
739 } else if (caseCDResult
) {
740 // prefer 2231/5987 with or without continuations over plain format
741 *aResult
= caseCDResult
;
742 caseCDResult
= nullptr;
743 charset
.Assign(charsetCD
);
744 } else if (caseAResult
) {
745 *aResult
= caseAResult
;
746 caseAResult
= nullptr;
754 // if we have a result
756 // then return charset and lang as well
757 if (aLang
&& !lang
.IsEmpty()) {
758 uint32_t len
= lang
.Length();
759 *aLang
= (char*)moz_xmemdup(lang
.BeginReading(), len
+ 1);
762 if (aCharset
&& !charset
.IsEmpty()) {
763 uint32_t len
= charset
.Length();
764 *aCharset
= (char*)moz_xmemdup(charset
.BeginReading(), len
+ 1);
765 *(*aCharset
+ len
) = 0;
769 return *aResult
? NS_OK
: NS_ERROR_INVALID_ARG
;
772 nsresult
internalDecodeRFC2047Header(const char* aHeaderVal
,
773 const nsACString
& aDefaultCharset
,
774 bool aOverrideCharset
,
775 bool aEatContinuations
,
776 nsACString
& aResult
) {
778 if (!aHeaderVal
) return NS_ERROR_INVALID_ARG
;
779 if (!*aHeaderVal
) return NS_OK
;
781 // If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string but
782 // aDefaultCharset is specified, decodes RFC 2047 encoding and converts
783 // to UTF-8. Otherwise, just strips away CRLF.
784 if (strstr(aHeaderVal
, "=?") ||
785 (!aDefaultCharset
.IsEmpty() &&
786 (!IsUtf8(nsDependentCString(aHeaderVal
)) ||
787 Is7bitNonAsciiString(aHeaderVal
, strlen(aHeaderVal
))))) {
788 DecodeRFC2047Str(aHeaderVal
, aDefaultCharset
, aOverrideCharset
, aResult
);
789 } else if (aEatContinuations
&&
790 (strchr(aHeaderVal
, '\n') || strchr(aHeaderVal
, '\r'))) {
791 aResult
= aHeaderVal
;
793 aEatContinuations
= false;
794 aResult
= aHeaderVal
;
797 if (aEatContinuations
) {
798 nsAutoCString
temp(aResult
);
799 temp
.ReplaceSubstring("\n\t", " ");
800 temp
.ReplaceSubstring("\r\t", " ");
809 nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal
,
810 const char* aDefaultCharset
,
811 bool aOverrideCharset
,
812 bool aEatContinuations
,
813 nsACString
& aResult
) {
814 return internalDecodeRFC2047Header(aHeaderVal
, nsCString(aDefaultCharset
),
815 aOverrideCharset
, aEatContinuations
,
819 // true if the character is allowed in a RFC 5987 value
820 // see RFC 5987, Section 3.2.1, "attr-char"
821 bool IsRFC5987AttrChar(char aChar
) {
824 return (c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z') ||
825 (c
>= '0' && c
<= '9') ||
826 (c
== '!' || c
== '#' || c
== '$' || c
== '&' || c
== '+' ||
827 c
== '-' || c
== '.' || c
== '^' || c
== '_' || c
== '`' ||
828 c
== '|' || c
== '~');
831 // percent-decode a value
832 // returns false on failure
833 bool PercentDecode(nsACString
& aValue
) {
834 char* c
= (char*)moz_xmalloc(aValue
.Length() + 1);
836 strcpy(c
, PromiseFlatCString(aValue
).get());
844 // Decode a parameter value using the encoding defined in RFC 5987
846 // charset "'" [ language ] "'" value-chars
848 nsMIMEHeaderParamImpl::DecodeRFC5987Param(const nsACString
& aParamVal
,
850 nsAString
& aResult
) {
851 nsAutoCString charset
;
852 nsAutoCString language
;
855 uint32_t delimiters
= 0;
856 const nsCString
& encoded
= PromiseFlatCString(aParamVal
);
857 const char* c
= encoded
.get();
865 } else if (((unsigned char)tc
) >= 128) {
866 // fail early, not ASCII
867 NS_WARNING("non-US-ASCII character in RFC5987-encoded param");
868 return NS_ERROR_INVALID_ARG
;
870 if (delimiters
== 0) {
871 // valid characters are checked later implicitly
873 } else if (delimiters
== 1) {
874 // no value checking for now
876 } else if (delimiters
== 2) {
877 if (IsRFC5987AttrChar(tc
)) {
879 } else if (tc
== '%') {
880 if (!IsHexDigit(c
[0]) || !IsHexDigit(c
[1])) {
881 // we expect two more characters
882 NS_WARNING("broken %-escape in RFC5987-encoded param");
883 return NS_ERROR_INVALID_ARG
;
886 // we consume two more
890 // character not allowed here
891 NS_WARNING("invalid character in RFC5987-encoded param");
892 return NS_ERROR_INVALID_ARG
;
898 if (delimiters
!= 2) {
899 NS_WARNING("missing delimiters in RFC5987-encoded param");
900 return NS_ERROR_INVALID_ARG
;
903 // abort early for unsupported encodings
904 if (!charset
.LowerCaseEqualsLiteral("utf-8")) {
905 NS_WARNING("unsupported charset in RFC5987-encoded param");
906 return NS_ERROR_INVALID_ARG
;
910 if (!PercentDecode(value
)) {
911 return NS_ERROR_OUT_OF_MEMORY
;
914 // return the encoding
915 aLang
.Assign(language
);
917 // finally convert octet sequence to UTF-8 and be done
919 nsresult rv
= ConvertStringToUTF8(value
, charset
, true, false, utf8
);
920 NS_ENSURE_SUCCESS(rv
, rv
);
922 CopyUTF8toUTF16(utf8
, aResult
);
926 nsresult
internalDecodeParameter(const nsACString
& aParamValue
,
927 const nsACString
& aCharset
,
928 const nsACString
& aDefaultCharset
,
929 bool aOverrideCharset
, bool aDecode2047
,
930 nsACString
& aResult
) {
932 // If aCharset is given, aParamValue was obtained from RFC2231/5987
933 // encoding and we're pretty sure that it's in aCharset.
934 if (!aCharset
.IsEmpty()) {
935 return ConvertStringToUTF8(aParamValue
, aCharset
, true, true, aResult
);
938 const nsCString
& param
= PromiseFlatCString(aParamValue
);
939 nsAutoCString unQuoted
;
940 nsACString::const_iterator s
, e
;
941 param
.BeginReading(s
);
944 // strip '\' when used to quote CR, LF, '"' and '\'
945 for (; s
!= e
; ++s
) {
948 --s
; // '\' is at the end. move back and append '\'.
949 } else if (*s
!= nsCRT::CR
&& *s
!= nsCRT::LF
&& *s
!= '"' &&
951 --s
; // '\' is not foll. by CR,LF,'"','\'. move back and append '\'
953 // else : skip '\' and append the quoted character.
962 nsAutoCString decoded
;
964 // Try RFC 2047 encoding, instead.
965 rv
= internalDecodeRFC2047Header(unQuoted
.get(), aDefaultCharset
,
966 aOverrideCharset
, true, decoded
);
968 if (NS_SUCCEEDED(rv
) && !decoded
.IsEmpty()) aResult
= decoded
;
975 nsMIMEHeaderParamImpl::DecodeParameter(const nsACString
& aParamValue
,
976 const char* aCharset
,
977 const char* aDefaultCharset
,
978 bool aOverrideCharset
,
979 nsACString
& aResult
) {
980 return internalDecodeParameter(aParamValue
, nsCString(aCharset
),
981 nsCString(aDefaultCharset
), aOverrideCharset
,
985 #define ISHEXCHAR(c) \
986 ((0x30 <= uint8_t(c) && uint8_t(c) <= 0x39) || \
987 (0x41 <= uint8_t(c) && uint8_t(c) <= 0x46) || \
988 (0x61 <= uint8_t(c) && uint8_t(c) <= 0x66))
990 // Decode Q encoding (RFC 2047).
992 char* DecodeQ(const char* in
, uint32_t length
) {
993 char *out
, *dest
= nullptr;
995 out
= dest
= (char*)calloc(length
+ 1, sizeof(char));
996 if (dest
== nullptr) return nullptr;
1001 // check if |in| in the form of '=hh' where h is [0-9a-fA-F].
1002 if (length
< 3 || !ISHEXCHAR(in
[1]) || !ISHEXCHAR(in
[2])) {
1005 PR_sscanf(in
+ 1, "%2X", &c
);
1018 if (*in
& 0x80) goto badsyntax
;
1025 for (out
= dest
; *out
; ++out
) {
1026 if (*out
== '\t') *out
= ' ';
1036 // check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842))
1037 // or has ESC which may be an indication that it's in one of many ISO
1038 // 2022 7bit encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554).
1040 bool Is7bitNonAsciiString(const char* input
, uint32_t len
) {
1044 hz_initial
, // No HZ seen yet
1045 hz_escaped
, // Inside an HZ ~{ escape sequence
1046 hz_seen
, // Have seen at least one complete HZ sequence
1047 hz_notpresent
// Have seen something that is not legal HZ
1050 hz_state
= hz_initial
;
1052 c
= uint8_t(*input
++);
1054 if (c
& 0x80) return false;
1055 if (c
== 0x1B) return true;
1060 if (*input
== '{') {
1061 hz_state
= hz_escaped
;
1062 } else if (*input
== '~') {
1063 // ~~ is the HZ encoding of ~. Skip over second ~ as well
1068 hz_state
= hz_notpresent
;
1073 if (*input
== '}') hz_state
= hz_seen
;
1080 return hz_state
== hz_seen
;
1083 #define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD)
1085 // copy 'raw' sequences of octets in aInput to aOutput.
1086 // If aDefaultCharset is specified, the input is assumed to be in the
1087 // charset and converted to UTF-8. Otherwise, a blind copy is made.
1088 // If aDefaultCharset is specified, but the conversion to UTF-8
1089 // is not successful, each octet is replaced by Unicode replacement
1090 // chars. *aOutput is advanced by the number of output octets.
1092 void CopyRawHeader(const char* aInput
, uint32_t aLen
,
1093 const nsACString
& aDefaultCharset
, nsACString
& aOutput
) {
1096 // If aDefaultCharset is not specified, make a blind copy.
1097 if (aDefaultCharset
.IsEmpty()) {
1098 aOutput
.Append(aInput
, aLen
);
1102 // Copy as long as it's US-ASCII. An ESC may indicate ISO 2022
1103 // A ~ may indicate it is HZ
1104 while (aLen
&& (c
= uint8_t(*aInput
++)) != 0x1B && c
!= '~' && !(c
& 0x80)) {
1105 aOutput
.Append(char(c
));
1113 // skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii
1114 // string and aDefaultCharset is a 7bit non-ascii charset.
1116 (c
== 0x1B || c
== '~') &&
1117 IS_7BIT_NON_ASCII_CHARSET(PromiseFlatCString(aDefaultCharset
).get());
1119 // If not UTF-8, treat as default charset
1120 nsAutoCString utf8Text
;
1121 if (NS_SUCCEEDED(ConvertStringToUTF8(Substring(aInput
, aInput
+ aLen
),
1122 PromiseFlatCString(aDefaultCharset
),
1123 skipCheck
, true, utf8Text
))) {
1124 aOutput
.Append(utf8Text
);
1125 } else { // replace each octet with Unicode replacement char in UTF-8.
1126 for (uint32_t i
= 0; i
< aLen
; i
++) {
1127 c
= uint8_t(*aInput
++);
1129 aOutput
.Append(REPLACEMENT_CHAR
);
1131 aOutput
.Append(char(c
));
1137 nsresult
DecodeQOrBase64Str(const char* aEncoded
, size_t aLen
, char aQOrBase64
,
1138 const nsACString
& aCharset
, nsACString
& aResult
) {
1140 bool b64alloc
= false;
1141 NS_ASSERTION(aQOrBase64
== 'Q' || aQOrBase64
== 'B', "Should be 'Q' or 'B'");
1142 if (aQOrBase64
== 'Q') {
1143 decodedText
= DecodeQ(aEncoded
, aLen
);
1144 } else if (aQOrBase64
== 'B') {
1145 decodedText
= PL_Base64Decode(aEncoded
, aLen
, nullptr);
1148 return NS_ERROR_INVALID_ARG
;
1152 return NS_ERROR_INVALID_ARG
;
1155 nsAutoCString utf8Text
;
1156 // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
1157 nsresult rv
= ConvertStringToUTF8(
1158 nsDependentCString(decodedText
), aCharset
,
1159 IS_7BIT_NON_ASCII_CHARSET(PromiseFlatCString(aCharset
).get()), true,
1162 PR_Free(decodedText
);
1166 if (NS_FAILED(rv
)) {
1169 aResult
.Append(utf8Text
);
1174 static const char especials
[] = R
"(()<>@,;:\"/[]?.=)";
1176 // |decode_mime_part2_str| taken from comi18n.c
1177 // Decode RFC2047-encoded words in the input and convert the result to UTF-8.
1178 // If aOverrideCharset is true, charset in RFC2047-encoded words is
1179 // ignored and aDefaultCharset is assumed, instead. aDefaultCharset
1180 // is also used to convert raw octets (without RFC 2047 encoding) to UTF-8.
1182 nsresult DecodeRFC2047Str(const char* aHeader,
1183 const nsACString& aDefaultCharset,
1184 bool aOverrideCharset, nsACString& aResult) {
1185 const char *p, *q = nullptr, *r;
1186 const char* begin; // tracking pointer for where we are in the input buffer
1187 int32_t isLastEncodedWord = 0;
1188 const char *charsetStart, *charsetEnd;
1189 nsAutoCString prevCharset, curCharset;
1190 nsAutoCString encodedText;
1191 char prevEncoding = '\0', curEncoding;
1196 // To avoid buffer realloc, if possible, set capacity in advance. No
1197 // matter what, more than 3x expansion can never happen for all charsets
1198 // supported by Mozilla. SCSU/BCSU with the sliding window set to a
1199 // non-BMP block may be exceptions, but Mozilla does not support them.
1200 // Neither any known mail/news program use them. Even if there's, we're
1201 // safe because we don't use a raw *char any more.
1202 aResult.SetCapacity(3 * strlen(aHeader));
1204 while ((p = strstr(begin, "=?")) != nullptr) {
1205 if (isLastEncodedWord) {
1206 // See if it's all whitespace.
1207 for (q = begin; q < p; ++q) {
1208 if (!strchr(" \t\r\n", *q)) {
1214 if (!isLastEncodedWord || q < p) {
1215 if (!encodedText.IsEmpty()) {
1216 rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
1217 prevEncoding, prevCharset, aResult);
1218 if (NS_FAILED(rv)) {
1219 aResult.Append(encodedText);
1221 encodedText.Truncate();
1222 prevCharset.Truncate();
1223 prevEncoding = '\0';
1225 // copy the part before the encoded-word
1226 CopyRawHeader(begin, p - begin, aDefaultCharset, aResult);
1234 charsetEnd = nullptr;
1235 for (q = p; *q != '?'; q++) {
1236 if (*q <= ' ' || strchr(especials, *q)) {
1240 // RFC 2231 section 5
1241 if (!charsetEnd && *q == '*') {
1250 curEncoding = nsCRT::ToUpper(*q);
1251 if (curEncoding != 'Q' && curEncoding != 'B') goto badsyntax;
1253 if (q[1] != '?') goto badsyntax;
1255 // loop-wise, keep going until we hit "?=". the inner check handles the
1256 // nul terminator should the string terminate before we hit the right
1257 // marker. (And the r[1] will never reach beyond the end of the string
1258 // because *r != '?' is true if r is the nul character.)
1259 for (r = q + 2; *r != '?' || r[1] != '='; r++) {
1260 if (*r < ' ') goto badsyntax;
1265 isLastEncodedWord = 1;
1269 curCharset.Assign(charsetStart, charsetEnd - charsetStart);
1270 // Override charset if requested. Never override labeled UTF-8.
1271 // Use default charset instead of UNKNOWN-8BIT
1272 if ((aOverrideCharset &&
1273 0 != nsCRT::strcasecmp(curCharset.get(), "UTF
-8")) ||
1274 (!aDefaultCharset.IsEmpty() &&
1275 0 == nsCRT::strcasecmp(curCharset.get(), "UNKNOWN
-8BIT
"))) {
1276 curCharset = aDefaultCharset;
1281 if (curEncoding == 'B') {
1282 // bug 227290. ignore an extraneous '=' at the end.
1283 // (# of characters in B-encoded part has to be a multiple of 4)
1284 int32_t n = r - (q + 2);
1285 R -= (n % 4 == 1 && !strncmp(r - 3, "===", 3)) ? 1 : 0;
1287 // Bug 493544. Don't decode the encoded text until it ends
1289 (prevCharset.IsEmpty() ||
1290 (curCharset == prevCharset && curEncoding == prevEncoding))) {
1291 encodedText.Append(q + 2, R - (q + 2));
1292 prevCharset = curCharset;
1293 prevEncoding = curEncoding;
1296 isLastEncodedWord = 1;
1300 bool bDecoded; // If the current line has been decoded.
1302 if (!encodedText.IsEmpty()) {
1303 if (curCharset == prevCharset && curEncoding == prevEncoding) {
1304 encodedText.Append(q + 2, R - (q + 2));
1307 rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
1308 prevEncoding, prevCharset, aResult);
1309 if (NS_FAILED(rv)) {
1310 aResult.Append(encodedText);
1312 encodedText.Truncate();
1313 prevCharset.Truncate();
1314 prevEncoding = '\0';
1317 rv = DecodeQOrBase64Str(q + 2, R - (q + 2), curEncoding, curCharset,
1319 if (NS_FAILED(rv)) {
1320 aResult.Append(encodedText);
1325 isLastEncodedWord = 1;
1329 if (!encodedText.IsEmpty()) {
1330 rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
1331 prevEncoding, prevCharset, aResult);
1332 if (NS_FAILED(rv)) {
1333 aResult.Append(encodedText);
1335 encodedText.Truncate();
1336 prevCharset.Truncate();
1338 // copy the part before the encoded-word
1339 aResult.Append(begin, p - begin);
1341 isLastEncodedWord = 0;
1344 if (!encodedText.IsEmpty()) {
1345 rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
1346 prevEncoding, prevCharset, aResult);
1347 if (NS_FAILED(rv)) {
1348 aResult.Append(encodedText);
1352 // put the tail back
1353 CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult);
1355 nsAutoCString tempStr(aResult);
1356 tempStr.ReplaceChar('\t', ' ');