1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set sw=4 ts=8 et tw=80 : */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
17 #include "nsIUTF8ConverterService.h"
18 #include "nsUConvCID.h"
19 #include "nsIServiceManager.h"
20 #include "nsMIMEHeaderParamImpl.h"
21 #include "nsReadableUtils.h"
22 #include "nsNativeCharsetUtils.h"
24 #include "nsIUnicodeDecoder.h"
25 #include "mozilla/dom/EncodingUtils.h"
27 using mozilla::dom::EncodingUtils
;
29 // static functions declared below are moved from mailnews/mime/src/comi18n.cpp
31 static char *DecodeQ(const char *, uint32_t);
32 static bool Is7bitNonAsciiString(const char *, uint32_t);
33 static void CopyRawHeader(const char *, uint32_t, const char *, nsACString
&);
34 static nsresult
DecodeRFC2047Str(const char *, const char *, bool, nsACString
&);
35 static nsresult
internalDecodeParameter(const nsACString
&, const char*,
36 const char*, bool, bool, nsACString
&);
38 // XXX The chance of UTF-7 being used in the message header is really
39 // low, but in theory it's possible.
40 #define IS_7BIT_NON_ASCII_CHARSET(cset) \
41 (!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \
42 !nsCRT::strncasecmp((cset), "HZ-GB", 5) || \
43 !nsCRT::strncasecmp((cset), "UTF-7", 5))
45 NS_IMPL_ISUPPORTS1(nsMIMEHeaderParamImpl
, nsIMIMEHeaderParam
)
48 nsMIMEHeaderParamImpl::GetParameter(const nsACString
& aHeaderVal
,
49 const char *aParamName
,
50 const nsACString
& aFallbackCharset
,
51 bool aTryLocaleCharset
,
52 char **aLang
, nsAString
& aResult
)
54 return DoGetParameter(aHeaderVal
, aParamName
, MIME_FIELD_ENCODING
,
55 aFallbackCharset
, aTryLocaleCharset
, aLang
, aResult
);
59 nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString
& aHeaderVal
,
60 const char *aParamName
,
61 const nsACString
& aFallbackCharset
,
62 bool aTryLocaleCharset
,
63 char **aLang
, nsAString
& aResult
)
65 return DoGetParameter(aHeaderVal
, aParamName
, HTTP_FIELD_ENCODING
,
66 aFallbackCharset
, aTryLocaleCharset
, aLang
, aResult
);
69 // XXX : aTryLocaleCharset is not yet effective.
71 nsMIMEHeaderParamImpl::DoGetParameter(const nsACString
& aHeaderVal
,
72 const char *aParamName
,
73 ParamDecoding aDecoding
,
74 const nsACString
& aFallbackCharset
,
75 bool aTryLocaleCharset
,
76 char **aLang
, nsAString
& aResult
)
81 // get parameter (decode RFC 2231/5987 when applicable, as specified by
82 // aDecoding (5987 being a subset of 2231) and return charset.)
84 nsXPIDLCString charset
;
85 rv
= DoParameterInternal(PromiseFlatCString(aHeaderVal
).get(), aParamName
,
86 aDecoding
, getter_Copies(charset
), aLang
,
91 // convert to UTF-8 after charset conversion and RFC 2047 decoding
95 rv
= internalDecodeParameter(med
, charset
.get(), nullptr, false,
96 // was aDecoding == MIME_FIELD_ENCODING
100 NS_ENSURE_SUCCESS(rv
, rv
);
102 if (!aFallbackCharset
.IsEmpty())
104 nsAutoCString charset
;
105 EncodingUtils::FindEncodingForLabel(aFallbackCharset
, charset
);
107 nsCOMPtr
<nsIUTF8ConverterService
>
108 cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID
));
110 NS_SUCCEEDED(cvtUTF8
->ConvertStringToUTF8(str1
,
111 PromiseFlatCString(aFallbackCharset
).get(), false,
112 !charset
.EqualsLiteral("UTF-8"),
114 CopyUTF8toUTF16(str2
, aResult
);
120 CopyUTF8toUTF16(str1
, aResult
);
124 if (aTryLocaleCharset
&& !NS_IsNativeUTF8())
125 return NS_CopyNativeToUnicode(str1
, aResult
);
127 CopyASCIItoUTF16(str1
, aResult
);
131 // remove backslash-encoded sequences from quoted-strings
132 // modifies string in place, potentially shortening it
133 void RemoveQuotedStringEscapes(char *src
)
137 for (char *c
= src
; *c
; ++c
)
139 if (c
[0] == '\\' && c
[1])
141 // skip backslash if not at end
149 // true is character is a hex digit
150 bool IsHexDigit(char aChar
)
154 return (c
>= 'a' && c
<= 'f') ||
155 (c
>= 'A' && c
<= 'F') ||
156 (c
>= '0' && c
<= '9');
159 // validate that a C String containing %-escapes is syntactically valid
160 bool IsValidPercentEscaped(const char *aValue
, int32_t len
)
162 for (int32_t i
= 0; i
< len
; i
++) {
163 if (aValue
[i
] == '%') {
164 if (!IsHexDigit(aValue
[i
+ 1]) || !IsHexDigit(aValue
[i
+ 2])) {
172 // Support for continuations (RFC 2231, Section 3)
174 // only a sane number supported
175 #define MAX_CONTINUATIONS 999
177 // part of a continuation
181 Continuation(const char *aValue
, uint32_t aLength
,
182 bool aNeedsPercentDecoding
, bool aWasQuotedString
) {
185 needsPercentDecoding
= aNeedsPercentDecoding
;
186 wasQuotedString
= aWasQuotedString
;
189 // empty constructor needed for nsTArray
192 needsPercentDecoding
= false;
193 wasQuotedString
= false;
199 bool needsPercentDecoding
;
200 bool wasQuotedString
;
203 // combine segments into a single string, returning the allocated string
204 // (or nullptr) while emptying the list
205 char *combineContinuations(nsTArray
<Continuation
>& aArray
)
208 if (aArray
.Length() == 0)
211 // Get an upper bound for the length
213 for (uint32_t i
= 0; i
< aArray
.Length(); i
++) {
214 length
+= aArray
[i
].length
;
218 char *result
= (char *) nsMemory::Alloc(length
+ 1);
224 for (uint32_t i
= 0; i
< aArray
.Length(); i
++) {
225 Continuation cont
= aArray
[i
];
226 if (! cont
.value
) break;
228 char *c
= result
+ strlen(result
);
229 strncat(result
, cont
.value
, cont
.length
);
230 if (cont
.needsPercentDecoding
) {
233 if (cont
.wasQuotedString
) {
234 RemoveQuotedStringEscapes(c
);
238 // return null if empty value
239 if (*result
== '\0') {
240 nsMemory::Free(result
);
245 NS_WARNING("Out of memory\n");
251 // add a continuation, return false on error if segment already has been seen
252 bool addContinuation(nsTArray
<Continuation
>& aArray
, uint32_t aIndex
,
253 const char *aValue
, uint32_t aLength
,
254 bool aNeedsPercentDecoding
, bool aWasQuotedString
)
256 if (aIndex
< aArray
.Length() && aArray
[aIndex
].value
) {
257 NS_WARNING("duplicate RC2231 continuation segment #\n");
261 if (aIndex
> MAX_CONTINUATIONS
) {
262 NS_WARNING("RC2231 continuation segment # exceeds limit\n");
266 if (aNeedsPercentDecoding
&& aWasQuotedString
) {
267 NS_WARNING("RC2231 continuation segment can't use percent encoding and quoted string form at the same time\n");
271 Continuation
cont(aValue
, aLength
, aNeedsPercentDecoding
, aWasQuotedString
);
273 if (aArray
.Length() <= aIndex
) {
274 aArray
.SetLength(aIndex
+ 1);
276 aArray
[aIndex
] = cont
;
281 // parse a segment number; return -1 on error
282 int32_t parseSegmentNumber(const char *aValue
, int32_t aLen
)
285 NS_WARNING("segment number missing\n");
289 if (aLen
> 1 && aValue
[0] == '0') {
290 NS_WARNING("leading '0' not allowed in segment number\n");
294 int32_t segmentNumber
= 0;
296 for (int32_t i
= 0; i
< aLen
; i
++) {
297 if (! (aValue
[i
] >= '0' && aValue
[i
] <= '9')) {
298 NS_WARNING("invalid characters in segment number\n");
303 segmentNumber
+= aValue
[i
] - '0';
304 if (segmentNumber
> MAX_CONTINUATIONS
) {
305 NS_WARNING("Segment number exceeds sane size\n");
310 return segmentNumber
;
313 // validate a given octet sequence for compliance with the specified
315 bool IsValidOctetSequenceForCharset(nsACString
& aCharset
, const char *aOctets
)
317 nsCOMPtr
<nsIUTF8ConverterService
> cvtUTF8(do_GetService
318 (NS_UTF8CONVERTERSERVICE_CONTRACTID
));
320 NS_WARNING("Can't get UTF8ConverterService\n");
324 nsAutoCString tmpRaw
;
325 tmpRaw
.Assign(aOctets
);
326 nsAutoCString tmpDecoded
;
328 nsresult rv
= cvtUTF8
->ConvertStringToUTF8(tmpRaw
,
329 PromiseFlatCString(aCharset
).get(),
330 false, false, 1, tmpDecoded
);
333 // we can't decode; charset may be unsupported, or the octet sequence
334 // is broken (illegal or incomplete octet sequence contained)
335 NS_WARNING("RFC2231/5987 parameter value does not decode according to specified charset\n");
342 // moved almost verbatim from mimehdrs.cpp
344 // MimeHeaders_get_parameter (const char *header_value, const char *parm_name,
345 // char **charset, char **language)
347 // The format of these header lines is
348 // <token> [ ';' <token> '=' <token-or-quoted-string> ]*
350 nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue
,
351 const char *aParamName
,
356 return DoParameterInternal(aHeaderValue
, aParamName
, MIME_FIELD_ENCODING
,
357 aCharset
, aLang
, aResult
);
362 nsMIMEHeaderParamImpl::DoParameterInternal(const char *aHeaderValue
,
363 const char *aParamName
,
364 ParamDecoding aDecoding
,
370 if (!aHeaderValue
|| !*aHeaderValue
|| !aResult
)
371 return NS_ERROR_INVALID_ARG
;
375 if (aCharset
) *aCharset
= nullptr;
376 if (aLang
) *aLang
= nullptr;
378 nsAutoCString charset
;
380 // change to (aDecoding != HTTP_FIELD_ENCODING) when we want to disable
381 // them for HTTP header fields later on, see bug 776324
382 bool acceptContinuations
= true;
384 const char *str
= aHeaderValue
;
386 // skip leading white space.
387 for (; *str
&& nsCRT::IsAsciiSpace(*str
); ++str
)
389 const char *start
= str
;
391 // aParamName is empty. return the first (possibly) _unnamed_ 'parameter'
392 // For instance, return 'inline' in the following case:
393 // Content-Disposition: inline; filename=.....
394 if (!aParamName
|| !*aParamName
)
396 for (; *str
&& *str
!= ';' && !nsCRT::IsAsciiSpace(*str
); ++str
)
399 return NS_ERROR_FIRST_HEADER_FIELD_COMPONENT_EMPTY
;
401 *aResult
= (char *) nsMemory::Clone(start
, (str
- start
) + 1);
402 NS_ENSURE_TRUE(*aResult
, NS_ERROR_OUT_OF_MEMORY
);
403 (*aResult
)[str
- start
] = '\0'; // null-terminate
407 /* Skip forward to first ';' */
408 for (; *str
&& *str
!= ';' && *str
!= ','; ++str
)
412 /* Skip over following whitespace */
413 for (; *str
&& nsCRT::IsAsciiSpace(*str
); ++str
)
416 // Some broken http servers just specify parameters
417 // like 'filename' without specifying disposition
418 // method. Rewind to the first non-white-space
424 // RFC2231 - The legitimate parm format can be:
425 // A. title=ThisIsTitle
426 // B. title*=us-ascii'en-us'This%20is%20wierd.
427 // C. title*0*=us-ascii'en'This%20is%20wierd.%20We
428 // title*1*=have%20to%20support%20this.
430 // D. title*0="Hey, what you think you are doing?"
431 // title*1="There is no charset and lang info."
432 // RFC5987: only A and B
434 // collect results for the different algorithms (plain filename,
435 // RFC5987/2231-encoded filename, + continuations) separately and decide
436 // which to use at the end
437 char *caseAResult
= nullptr;
438 char *caseBResult
= nullptr;
439 char *caseCDResult
= nullptr;
441 // collect continuation segments
442 nsTArray
<Continuation
> segments
;
445 // our copies of the charset parameter, kept separately as they might
446 // differ for the two formats
447 nsDependentCSubstring charsetB
, charsetCD
;
449 nsDependentCSubstring lang
;
451 int32_t paramLen
= strlen(aParamName
);
456 const char *nameStart
= str
;
457 const char *nameEnd
= nullptr;
458 const char *valueStart
= str
;
459 const char *valueEnd
= nullptr;
460 bool isQuotedString
= false;
462 NS_ASSERTION(!nsCRT::IsAsciiSpace(*str
), "should be after whitespace.");
464 // Skip forward to the end of this token.
465 for (; *str
&& !nsCRT::IsAsciiSpace(*str
) && *str
!= '=' && *str
!= ';'; str
++)
469 int32_t nameLen
= nameEnd
- nameStart
;
471 // Skip over whitespace, '=', and whitespace
472 while (nsCRT::IsAsciiSpace(*str
)) ++str
;
477 // don't accept parameters without "="
480 while (nsCRT::IsAsciiSpace(*str
)) ++str
;
483 // The value is a token, not a quoted string.
486 *valueEnd
&& !nsCRT::IsAsciiSpace (*valueEnd
) && *valueEnd
!= ';';
491 isQuotedString
= true;
495 for (valueEnd
= str
; *valueEnd
; ++valueEnd
) {
496 if (*valueEnd
== '\\' && *(valueEnd
+ 1))
498 else if (*valueEnd
== '"')
502 // *valueEnd != null means that *valueEnd is quote character.
507 // See if this is the simplest case (case A above),
508 // a 'single' line value with no charset and lang.
509 // If so, copy it and return.
510 if (nameLen
== paramLen
&&
511 !nsCRT::strncasecmp(nameStart
, aParamName
, paramLen
)) {
514 // we already have one caseA result, ignore subsequent ones
518 // if the parameter spans across multiple lines we have to strip out the
519 // line continuation -- jht 4/29/98
520 nsAutoCString
tempStr(valueStart
, valueEnd
- valueStart
);
521 tempStr
.StripChars("\r\n");
522 char *res
= ToNewCString(tempStr
);
523 NS_ENSURE_TRUE(res
, NS_ERROR_OUT_OF_MEMORY
);
526 RemoveQuotedStringEscapes(res
);
529 // keep going, we may find a RFC 2231/5987 encoded alternative
532 else if (nameLen
> paramLen
&&
533 !nsCRT::strncasecmp(nameStart
, aParamName
, paramLen
) &&
534 *(nameStart
+ paramLen
) == '*') {
537 const char *cp
= nameStart
+ paramLen
+ 1;
539 // if param name ends in "*" we need do to RFC5987 "ext-value" decoding
540 bool needExtDecoding
= *(nameEnd
- 1) == '*';
542 bool caseB
= nameLen
== paramLen
+ 1;
543 bool caseCStart
= (*cp
== '0') && needExtDecoding
;
545 // parse the segment number
546 int32_t segmentNumber
= -1;
548 int32_t segLen
= (nameEnd
- cp
) - (needExtDecoding
? 1 : 0);
549 segmentNumber
= parseSegmentNumber(cp
, segLen
);
551 if (segmentNumber
== -1) {
552 acceptContinuations
= false;
557 // CaseB and start of CaseC: requires charset and optional language
558 // in quotes (quotes required even if lang is blank)
559 if (caseB
|| (caseCStart
&& acceptContinuations
)) {
560 // look for single quotation mark(')
561 const char *sQuote1
= PL_strchr(valueStart
, 0x27);
562 const char *sQuote2
= sQuote1
? PL_strchr(sQuote1
+ 1, 0x27) : nullptr;
564 // Two single quotation marks must be present even in
565 // absence of charset and lang.
566 if (!sQuote1
|| !sQuote2
) {
567 NS_WARNING("Mandatory two single quotes are missing in header parameter\n");
570 const char *charsetStart
= nullptr;
571 int32_t charsetLength
= 0;
572 const char *langStart
= nullptr;
573 int32_t langLength
= 0;
574 const char *rawValStart
= nullptr;
575 int32_t rawValLength
= 0;
577 if (sQuote2
&& sQuote1
) {
578 // both delimiters present: charSet'lang'rawVal
579 rawValStart
= sQuote2
+ 1;
580 rawValLength
= valueEnd
- rawValStart
;
582 langStart
= sQuote1
+ 1;
583 langLength
= sQuote2
- langStart
;
585 charsetStart
= valueStart
;
586 charsetLength
= sQuote1
- charsetStart
;
589 // one delimiter; assume charset'rawVal
590 rawValStart
= sQuote1
+ 1;
591 rawValLength
= valueEnd
- rawValStart
;
593 charsetStart
= valueStart
;
594 charsetLength
= sQuote1
- valueStart
;
597 // no delimiter: just rawVal
598 rawValStart
= valueStart
;
599 rawValLength
= valueEnd
- valueStart
;
602 if (langLength
!= 0) {
603 lang
.Assign(langStart
, langLength
);
606 // keep the charset for later
608 charsetB
.Assign(charsetStart
, charsetLength
);
611 charsetCD
.Assign(charsetStart
, charsetLength
);
614 // non-empty value part
615 if (rawValLength
> 0) {
616 if (!caseBResult
&& caseB
) {
617 if (!IsValidPercentEscaped(rawValStart
, rawValLength
)) {
621 // allocate buffer for the raw value
622 char *tmpResult
= (char *) nsMemory::Clone(rawValStart
, rawValLength
+ 1);
626 *(tmpResult
+ rawValLength
) = 0;
628 nsUnescape(tmpResult
);
629 caseBResult
= tmpResult
;
632 bool added
= addContinuation(segments
, 0, rawValStart
,
633 rawValLength
, needExtDecoding
,
637 // continuation not added, stop processing them
638 acceptContinuations
= false;
642 } // end of if-block : title*0*= or title*=
643 // caseD: a line of multiline param with no need for unescaping : title*[0-9]=
644 // or 2nd or later lines of a caseC param : title*[1-9]*=
645 else if (acceptContinuations
&& segmentNumber
!= -1) {
646 uint32_t valueLength
= valueEnd
- valueStart
;
648 bool added
= addContinuation(segments
, segmentNumber
, valueStart
,
649 valueLength
, needExtDecoding
,
653 // continuation not added, stop processing them
654 acceptContinuations
= false;
656 } // end of if-block : title*[0-9]= or title*[1-9]*=
659 // str now points after the end of the value.
660 // skip over whitespace, ';', whitespace.
662 while (nsCRT::IsAsciiSpace(*str
)) ++str
;
666 // stop processing the header field; either we are done or the
667 // separator was missing
670 while (nsCRT::IsAsciiSpace(*str
)) ++str
;
673 caseCDResult
= combineContinuations(segments
);
675 if (caseBResult
&& !charsetB
.IsEmpty()) {
676 // check that the 2231/5987 result decodes properly given the
677 // specified character set
678 if (!IsValidOctetSequenceForCharset(charsetB
, caseBResult
))
679 caseBResult
= nullptr;
682 if (caseCDResult
&& !charsetCD
.IsEmpty()) {
683 // check that the 2231/5987 result decodes properly given the
684 // specified character set
685 if (!IsValidOctetSequenceForCharset(charsetCD
, caseCDResult
))
686 caseCDResult
= nullptr;
690 // prefer simple 5987 format over 2231 with continuations
691 *aResult
= caseBResult
;
692 caseBResult
= nullptr;
693 charset
.Assign(charsetB
);
695 else if (caseCDResult
) {
696 // prefer 2231/5987 with or without continuations over plain format
697 *aResult
= caseCDResult
;
698 caseCDResult
= nullptr;
699 charset
.Assign(charsetCD
);
701 else if (caseAResult
) {
702 *aResult
= caseAResult
;
703 caseAResult
= nullptr;
707 nsMemory::Free(caseAResult
);
708 nsMemory::Free(caseBResult
);
709 nsMemory::Free(caseCDResult
);
711 // if we have a result
713 // then return charset and lang as well
714 if (aLang
&& !lang
.IsEmpty()) {
715 uint32_t len
= lang
.Length();
716 *aLang
= (char *) nsMemory::Clone(lang
.BeginReading(), len
+ 1);
721 if (aCharset
&& !charset
.IsEmpty()) {
722 uint32_t len
= charset
.Length();
723 *aCharset
= (char *) nsMemory::Clone(charset
.BeginReading(), len
+ 1);
725 *(*aCharset
+ len
) = 0;
730 return *aResult
? NS_OK
: NS_ERROR_INVALID_ARG
;
734 internalDecodeRFC2047Header(const char* aHeaderVal
, const char* aDefaultCharset
,
735 bool aOverrideCharset
, bool aEatContinuations
,
740 return NS_ERROR_INVALID_ARG
;
745 // If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string but
746 // aDefaultCharset is specified, decodes RFC 2047 encoding and converts
747 // to UTF-8. Otherwise, just strips away CRLF.
748 if (PL_strstr(aHeaderVal
, "=?") ||
749 (aDefaultCharset
&& (!IsUTF8(nsDependentCString(aHeaderVal
)) ||
750 Is7bitNonAsciiString(aHeaderVal
, strlen(aHeaderVal
))))) {
751 DecodeRFC2047Str(aHeaderVal
, aDefaultCharset
, aOverrideCharset
, aResult
);
752 } else if (aEatContinuations
&&
753 (PL_strchr(aHeaderVal
, '\n') || PL_strchr(aHeaderVal
, '\r'))) {
754 aResult
= aHeaderVal
;
756 aEatContinuations
= false;
757 aResult
= aHeaderVal
;
760 if (aEatContinuations
) {
761 nsAutoCString
temp(aResult
);
762 temp
.ReplaceSubstring("\n\t", " ");
763 temp
.ReplaceSubstring("\r\t", " ");
764 temp
.StripChars("\r\n");
772 nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal
,
773 const char* aDefaultCharset
,
774 bool aOverrideCharset
,
775 bool aEatContinuations
,
778 return internalDecodeRFC2047Header(aHeaderVal
, aDefaultCharset
,
779 aOverrideCharset
, aEatContinuations
,
783 // true if the character is allowed in a RFC 5987 value
784 // see RFC 5987, Section 3.2.1, "attr-char"
785 bool IsRFC5987AttrChar(char aChar
)
789 return (c
>= 'a' && c
<= 'z') ||
790 (c
>= 'A' && c
<= 'Z') ||
791 (c
>= '0' && c
<= '9') ||
792 (c
== '!' || c
== '#' || c
== '$' || c
== '&' ||
793 c
== '+' || c
== '-' || c
== '.' || c
== '^' ||
794 c
== '_' || c
== '`' || c
== '|' || c
== '~');
797 // percent-decode a value
798 // returns false on failure
799 bool PercentDecode(nsACString
& aValue
)
801 char *c
= (char *) nsMemory::Alloc(aValue
.Length() + 1);
806 strcpy(c
, PromiseFlatCString(aValue
).get());
814 // Decode a parameter value using the encoding defined in RFC 5987
816 // charset "'" [ language ] "'" value-chars
818 nsMIMEHeaderParamImpl::DecodeRFC5987Param(const nsACString
& aParamVal
,
822 nsAutoCString charset
;
823 nsAutoCString language
;
826 uint32_t delimiters
= 0;
827 const char *encoded
= PromiseFlatCString(aParamVal
).get();
828 const char *c
= encoded
;
836 } else if (((unsigned char)tc
) >= 128) {
837 // fail early, not ASCII
838 NS_WARNING("non-US-ASCII character in RFC5987-encoded param");
839 return NS_ERROR_INVALID_ARG
;
841 if (delimiters
== 0) {
842 // valid characters are checked later implicitly
844 } else if (delimiters
== 1) {
845 // no value checking for now
847 } else if (delimiters
== 2) {
848 if (IsRFC5987AttrChar(tc
)) {
850 } else if (tc
== '%') {
851 if (!IsHexDigit(c
[0]) || !IsHexDigit(c
[1])) {
852 // we expect two more characters
853 NS_WARNING("broken %-escape in RFC5987-encoded param");
854 return NS_ERROR_INVALID_ARG
;
857 // we consume two more
861 // character not allowed here
862 NS_WARNING("invalid character in RFC5987-encoded param");
863 return NS_ERROR_INVALID_ARG
;
869 if (delimiters
!= 2) {
870 NS_WARNING("missing delimiters in RFC5987-encoded param");
871 return NS_ERROR_INVALID_ARG
;
874 // abort early for unsupported encodings
875 if (!charset
.LowerCaseEqualsLiteral("utf-8")) {
876 NS_WARNING("unsupported charset in RFC5987-encoded param");
877 return NS_ERROR_INVALID_ARG
;
881 if (!PercentDecode(value
)) {
882 return NS_ERROR_OUT_OF_MEMORY
;
885 // return the encoding
886 aLang
.Assign(language
);
888 // finally convert octet sequence to UTF-8 and be done
890 nsCOMPtr
<nsIUTF8ConverterService
> cvtUTF8
=
891 do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID
, &rv
);
892 NS_ENSURE_SUCCESS(rv
, rv
);
895 rv
= cvtUTF8
->ConvertStringToUTF8(value
, charset
.get(), true, false, 1, utf8
);
896 NS_ENSURE_SUCCESS(rv
, rv
);
898 CopyUTF8toUTF16(utf8
, aResult
);
903 internalDecodeParameter(const nsACString
& aParamValue
, const char* aCharset
,
904 const char* aDefaultCharset
, bool aOverrideCharset
,
905 bool aDecode2047
, nsACString
& aResult
)
908 // If aCharset is given, aParamValue was obtained from RFC2231/5987
909 // encoding and we're pretty sure that it's in aCharset.
910 if (aCharset
&& *aCharset
)
912 nsCOMPtr
<nsIUTF8ConverterService
> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID
));
914 return cvtUTF8
->ConvertStringToUTF8(aParamValue
, aCharset
,
915 true, true, 1, aResult
);
918 const nsAFlatCString
& param
= PromiseFlatCString(aParamValue
);
919 nsAutoCString unQuoted
;
920 nsACString::const_iterator s
, e
;
921 param
.BeginReading(s
);
924 // strip '\' when used to quote CR, LF, '"' and '\'
925 for ( ; s
!= e
; ++s
) {
928 --s
; // '\' is at the end. move back and append '\'.
930 else if (*s
!= nsCRT::CR
&& *s
!= nsCRT::LF
&& *s
!= '"' && *s
!= '\\') {
931 --s
; // '\' is not foll. by CR,LF,'"','\'. move back and append '\'
933 // else : skip '\' and append the quoted character.
942 nsAutoCString decoded
;
944 // Try RFC 2047 encoding, instead.
945 rv
= internalDecodeRFC2047Header(unQuoted
.get(), aDefaultCharset
,
946 aOverrideCharset
, true, decoded
);
948 if (NS_SUCCEEDED(rv
) && !decoded
.IsEmpty())
956 nsMIMEHeaderParamImpl::DecodeParameter(const nsACString
& aParamValue
,
957 const char* aCharset
,
958 const char* aDefaultCharset
,
959 bool aOverrideCharset
,
962 return internalDecodeParameter(aParamValue
, aCharset
, aDefaultCharset
,
963 aOverrideCharset
, true, aResult
);
966 #define ISHEXCHAR(c) \
967 ((0x30 <= uint8_t(c) && uint8_t(c) <= 0x39) || \
968 (0x41 <= uint8_t(c) && uint8_t(c) <= 0x46) || \
969 (0x61 <= uint8_t(c) && uint8_t(c) <= 0x66))
971 // Decode Q encoding (RFC 2047).
973 char *DecodeQ(const char *in
, uint32_t length
)
975 char *out
, *dest
= 0;
977 out
= dest
= (char *)PR_Calloc(length
+ 1, sizeof(char));
984 // check if |in| in the form of '=hh' where h is [0-9a-fA-F].
985 if (length
< 3 || !ISHEXCHAR(in
[1]) || !ISHEXCHAR(in
[2]))
987 PR_sscanf(in
+ 1, "%2X", &c
);
1000 if (*in
& 0x80) goto badsyntax
;
1007 for (out
= dest
; *out
; ++out
) {
1019 // check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842))
1020 // or has ESC which may be an indication that it's in one of many ISO
1021 // 2022 7bit encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554).
1023 bool Is7bitNonAsciiString(const char *input
, uint32_t len
)
1027 enum { hz_initial
, // No HZ seen yet
1028 hz_escaped
, // Inside an HZ ~{ escape sequence
1029 hz_seen
, // Have seen at least one complete HZ sequence
1030 hz_notpresent
// Have seen something that is not legal HZ
1033 hz_state
= hz_initial
;
1035 c
= uint8_t(*input
++);
1037 if (c
& 0x80) return false;
1038 if (c
== 0x1B) return true;
1043 if (*input
== '{') {
1044 hz_state
= hz_escaped
;
1045 } else if (*input
== '~') {
1046 // ~~ is the HZ encoding of ~. Skip over second ~ as well
1051 hz_state
= hz_notpresent
;
1056 if (*input
== '}') hz_state
= hz_seen
;
1063 return hz_state
== hz_seen
;
1066 #define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD)
1068 // copy 'raw' sequences of octets in aInput to aOutput.
1069 // If aDefaultCharset is specified, the input is assumed to be in the
1070 // charset and converted to UTF-8. Otherwise, a blind copy is made.
1071 // If aDefaultCharset is specified, but the conversion to UTF-8
1072 // is not successful, each octet is replaced by Unicode replacement
1073 // chars. *aOutput is advanced by the number of output octets.
1075 void CopyRawHeader(const char *aInput
, uint32_t aLen
,
1076 const char *aDefaultCharset
, nsACString
&aOutput
)
1080 // If aDefaultCharset is not specified, make a blind copy.
1081 if (!aDefaultCharset
|| !*aDefaultCharset
) {
1082 aOutput
.Append(aInput
, aLen
);
1086 // Copy as long as it's US-ASCII. An ESC may indicate ISO 2022
1087 // A ~ may indicate it is HZ
1088 while (aLen
&& (c
= uint8_t(*aInput
++)) != 0x1B && c
!= '~' && !(c
& 0x80)) {
1089 aOutput
.Append(char(c
));
1097 // skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii
1098 // string and aDefaultCharset is a 7bit non-ascii charset.
1099 bool skipCheck
= (c
== 0x1B || c
== '~') &&
1100 IS_7BIT_NON_ASCII_CHARSET(aDefaultCharset
);
1102 // If not UTF-8, treat as default charset
1103 nsCOMPtr
<nsIUTF8ConverterService
>
1104 cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID
));
1105 nsAutoCString utf8Text
;
1108 cvtUTF8
->ConvertStringToUTF8(Substring(aInput
, aInput
+ aLen
),
1109 aDefaultCharset
, skipCheck
, true, 1,
1111 aOutput
.Append(utf8Text
);
1112 } else { // replace each octet with Unicode replacement char in UTF-8.
1113 for (uint32_t i
= 0; i
< aLen
; i
++) {
1114 c
= uint8_t(*aInput
++);
1116 aOutput
.Append(REPLACEMENT_CHAR
);
1118 aOutput
.Append(char(c
));
1123 nsresult
DecodeQOrBase64Str(const char *aEncoded
, size_t aLen
, char aQOrBase64
,
1124 const char *aCharset
, nsACString
&aResult
)
1127 NS_ASSERTION(aQOrBase64
== 'Q' || aQOrBase64
== 'B', "Should be 'Q' or 'B'");
1128 if(aQOrBase64
== 'Q')
1129 decodedText
= DecodeQ(aEncoded
, aLen
);
1130 else if (aQOrBase64
== 'B') {
1131 decodedText
= PL_Base64Decode(aEncoded
, aLen
, nullptr);
1133 return NS_ERROR_INVALID_ARG
;
1137 return NS_ERROR_INVALID_ARG
;
1141 nsCOMPtr
<nsIUTF8ConverterService
>
1142 cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID
, &rv
));
1143 nsAutoCString utf8Text
;
1144 if (NS_SUCCEEDED(rv
)) {
1145 // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
1146 rv
= cvtUTF8
->ConvertStringToUTF8(nsDependentCString(decodedText
),
1148 IS_7BIT_NON_ASCII_CHARSET(aCharset
),
1151 PR_Free(decodedText
);
1152 if (NS_FAILED(rv
)) {
1155 aResult
.Append(utf8Text
);
1160 static const char especials
[] = "()<>@,;:\\\"/[]?.=";
1162 // |decode_mime_part2_str| taken from comi18n.c
1163 // Decode RFC2047-encoded words in the input and convert the result to UTF-8.
1164 // If aOverrideCharset is true, charset in RFC2047-encoded words is
1165 // ignored and aDefaultCharset is assumed, instead. aDefaultCharset
1166 // is also used to convert raw octets (without RFC 2047 encoding) to UTF-8.
1168 nsresult
DecodeRFC2047Str(const char *aHeader
, const char *aDefaultCharset
,
1169 bool aOverrideCharset
, nsACString
&aResult
)
1171 const char *p
, *q
= nullptr, *r
;
1172 const char *begin
; // tracking pointer for where we are in the input buffer
1173 int32_t isLastEncodedWord
= 0;
1174 const char *charsetStart
, *charsetEnd
;
1175 nsAutoCString prevCharset
, curCharset
;
1176 nsAutoCString encodedText
;
1177 char prevEncoding
= '\0', curEncoding
;
1182 // To avoid buffer realloc, if possible, set capacity in advance. No
1183 // matter what, more than 3x expansion can never happen for all charsets
1184 // supported by Mozilla. SCSU/BCSU with the sliding window set to a
1185 // non-BMP block may be exceptions, but Mozilla does not support them.
1186 // Neither any known mail/news program use them. Even if there's, we're
1187 // safe because we don't use a raw *char any more.
1188 aResult
.SetCapacity(3 * strlen(aHeader
));
1190 while ((p
= PL_strstr(begin
, "=?")) != 0) {
1191 if (isLastEncodedWord
) {
1192 // See if it's all whitespace.
1193 for (q
= begin
; q
< p
; ++q
) {
1194 if (!PL_strchr(" \t\r\n", *q
)) break;
1198 if (!isLastEncodedWord
|| q
< p
) {
1199 if (!encodedText
.IsEmpty()) {
1200 rv
= DecodeQOrBase64Str(encodedText
.get(), encodedText
.Length(),
1201 prevEncoding
, prevCharset
.get(), aResult
);
1202 if (NS_FAILED(rv
)) {
1203 aResult
.Append(encodedText
);
1205 encodedText
.Truncate();
1206 prevCharset
.Truncate();
1207 prevEncoding
= '\0';
1209 // copy the part before the encoded-word
1210 CopyRawHeader(begin
, p
- begin
, aDefaultCharset
, aResult
);
1219 for (q
= p
; *q
!= '?'; q
++) {
1220 if (*q
<= ' ' || PL_strchr(especials
, *q
)) {
1224 // RFC 2231 section 5
1225 if (!charsetEnd
&& *q
== '*') {
1234 curEncoding
= nsCRT::ToUpper(*q
);
1235 if (curEncoding
!= 'Q' && curEncoding
!= 'B')
1242 for (r
= q
+ 2; *r
!= '?'; r
++) {
1243 if (*r
< ' ') goto badsyntax
;
1247 else if (r
== q
+ 2) {
1250 isLastEncodedWord
= 1;
1254 curCharset
.Assign(charsetStart
, charsetEnd
- charsetStart
);
1255 // Override charset if requested. Never override labeled UTF-8.
1256 // Use default charset instead of UNKNOWN-8BIT
1257 if ((aOverrideCharset
&& 0 != nsCRT::strcasecmp(curCharset
.get(), "UTF-8"))
1258 || (aDefaultCharset
&& 0 == nsCRT::strcasecmp(curCharset
.get(), "UNKNOWN-8BIT"))
1260 curCharset
= aDefaultCharset
;
1265 if (curEncoding
== 'B') {
1266 // bug 227290. ignore an extraneous '=' at the end.
1267 // (# of characters in B-encoded part has to be a multiple of 4)
1268 int32_t n
= r
- (q
+ 2);
1269 R
-= (n
% 4 == 1 && !PL_strncmp(r
- 3, "===", 3)) ? 1 : 0;
1271 // Bug 493544. Don't decode the encoded text until it ends
1273 && (prevCharset
.IsEmpty()
1274 || (curCharset
== prevCharset
&& curEncoding
== prevEncoding
))
1276 encodedText
.Append(q
+ 2, R
- (q
+ 2));
1277 prevCharset
= curCharset
;
1278 prevEncoding
= curEncoding
;
1281 isLastEncodedWord
= 1;
1285 bool bDecoded
; // If the current line has been decoded.
1287 if (!encodedText
.IsEmpty()) {
1288 if (curCharset
== prevCharset
&& curEncoding
== prevEncoding
) {
1289 encodedText
.Append(q
+ 2, R
- (q
+ 2));
1292 rv
= DecodeQOrBase64Str(encodedText
.get(), encodedText
.Length(),
1293 prevEncoding
, prevCharset
.get(), aResult
);
1294 if (NS_FAILED(rv
)) {
1295 aResult
.Append(encodedText
);
1297 encodedText
.Truncate();
1298 prevCharset
.Truncate();
1299 prevEncoding
= '\0';
1302 rv
= DecodeQOrBase64Str(q
+ 2, R
- (q
+ 2), curEncoding
,
1303 curCharset
.get(), aResult
);
1304 if (NS_FAILED(rv
)) {
1305 aResult
.Append(encodedText
);
1310 isLastEncodedWord
= 1;
1314 if (!encodedText
.IsEmpty()) {
1315 rv
= DecodeQOrBase64Str(encodedText
.get(), encodedText
.Length(),
1316 prevEncoding
, prevCharset
.get(), aResult
);
1317 if (NS_FAILED(rv
)) {
1318 aResult
.Append(encodedText
);
1320 encodedText
.Truncate();
1321 prevCharset
.Truncate();
1323 // copy the part before the encoded-word
1324 aResult
.Append(begin
, p
- begin
);
1326 isLastEncodedWord
= 0;
1329 if (!encodedText
.IsEmpty()) {
1330 rv
= DecodeQOrBase64Str(encodedText
.get(), encodedText
.Length(),
1331 prevEncoding
, prevCharset
.get(), aResult
);
1332 if (NS_FAILED(rv
)) {
1333 aResult
.Append(encodedText
);
1337 // put the tail back
1338 CopyRawHeader(begin
, strlen(begin
), aDefaultCharset
, aResult
);
1340 nsAutoCString
tempStr(aResult
);
1341 tempStr
.ReplaceChar('\t', ' ');