Bumping manifests a=b2g-bump
[gecko.git] / netwerk / mime / nsMIMEHeaderParamImpl.cpp
blob9e1062eafb993da780a1587517ace51fd6b6a87a
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set sw=4 ts=8 et tw=80 : */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include <string.h>
8 #include "prmem.h"
9 #include "prprf.h"
10 #include "plstr.h"
11 #include "plbase64.h"
12 #include "nsCRT.h"
13 #include "nsMemory.h"
14 #include "nsTArray.h"
15 #include "nsCOMPtr.h"
16 #include "nsEscape.h"
17 #include "nsIUTF8ConverterService.h"
18 #include "nsUConvCID.h"
19 #include "nsIServiceManager.h"
20 #include "nsMIMEHeaderParamImpl.h"
21 #include "nsReadableUtils.h"
22 #include "nsNativeCharsetUtils.h"
23 #include "nsError.h"
24 #include "nsIUnicodeDecoder.h"
25 #include "mozilla/dom/EncodingUtils.h"
27 using mozilla::dom::EncodingUtils;
29 // static functions declared below are moved from mailnews/mime/src/comi18n.cpp
31 static char *DecodeQ(const char *, uint32_t);
32 static bool Is7bitNonAsciiString(const char *, uint32_t);
33 static void CopyRawHeader(const char *, uint32_t, const char *, nsACString &);
34 static nsresult DecodeRFC2047Str(const char *, const char *, bool, nsACString&);
35 static nsresult internalDecodeParameter(const nsACString&, const char*,
36 const char*, bool, bool, nsACString&);
38 // XXX The chance of UTF-7 being used in the message header is really
39 // low, but in theory it's possible.
40 #define IS_7BIT_NON_ASCII_CHARSET(cset) \
41 (!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \
42 !nsCRT::strncasecmp((cset), "HZ-GB", 5) || \
43 !nsCRT::strncasecmp((cset), "UTF-7", 5))
45 NS_IMPL_ISUPPORTS(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam)
47 NS_IMETHODIMP
48 nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal,
49 const char *aParamName,
50 const nsACString& aFallbackCharset,
51 bool aTryLocaleCharset,
52 char **aLang, nsAString& aResult)
54 return DoGetParameter(aHeaderVal, aParamName, MIME_FIELD_ENCODING,
55 aFallbackCharset, aTryLocaleCharset, aLang, aResult);
58 NS_IMETHODIMP
59 nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString& aHeaderVal,
60 const char *aParamName,
61 const nsACString& aFallbackCharset,
62 bool aTryLocaleCharset,
63 char **aLang, nsAString& aResult)
65 return DoGetParameter(aHeaderVal, aParamName, HTTP_FIELD_ENCODING,
66 aFallbackCharset, aTryLocaleCharset, aLang, aResult);
69 // XXX : aTryLocaleCharset is not yet effective.
70 nsresult
71 nsMIMEHeaderParamImpl::DoGetParameter(const nsACString& aHeaderVal,
72 const char *aParamName,
73 ParamDecoding aDecoding,
74 const nsACString& aFallbackCharset,
75 bool aTryLocaleCharset,
76 char **aLang, nsAString& aResult)
78 aResult.Truncate();
79 nsresult rv;
81 // get parameter (decode RFC 2231/5987 when applicable, as specified by
82 // aDecoding (5987 being a subset of 2231) and return charset.)
83 nsXPIDLCString med;
84 nsXPIDLCString charset;
85 rv = DoParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName,
86 aDecoding, getter_Copies(charset), aLang,
87 getter_Copies(med));
88 if (NS_FAILED(rv))
89 return rv;
91 // convert to UTF-8 after charset conversion and RFC 2047 decoding
92 // if necessary.
94 nsAutoCString str1;
95 rv = internalDecodeParameter(med, charset.get(), nullptr, false,
96 // was aDecoding == MIME_FIELD_ENCODING
97 // see bug 875615
98 true,
99 str1);
100 NS_ENSURE_SUCCESS(rv, rv);
102 if (!aFallbackCharset.IsEmpty())
104 nsAutoCString charset;
105 EncodingUtils::FindEncodingForLabel(aFallbackCharset, charset);
106 nsAutoCString str2;
107 nsCOMPtr<nsIUTF8ConverterService>
108 cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
109 if (cvtUTF8 &&
110 NS_SUCCEEDED(cvtUTF8->ConvertStringToUTF8(str1,
111 PromiseFlatCString(aFallbackCharset).get(), false,
112 !charset.EqualsLiteral("UTF-8"),
113 1, str2))) {
114 CopyUTF8toUTF16(str2, aResult);
115 return NS_OK;
119 if (IsUTF8(str1)) {
120 CopyUTF8toUTF16(str1, aResult);
121 return NS_OK;
124 if (aTryLocaleCharset && !NS_IsNativeUTF8())
125 return NS_CopyNativeToUnicode(str1, aResult);
127 CopyASCIItoUTF16(str1, aResult);
128 return NS_OK;
131 // remove backslash-encoded sequences from quoted-strings
132 // modifies string in place, potentially shortening it
133 void RemoveQuotedStringEscapes(char *src)
135 char *dst = src;
137 for (char *c = src; *c; ++c)
139 if (c[0] == '\\' && c[1])
141 // skip backslash if not at end
142 ++c;
144 *dst++ = *c;
146 *dst = 0;
149 // true is character is a hex digit
150 bool IsHexDigit(char aChar)
152 char c = aChar;
154 return (c >= 'a' && c <= 'f') ||
155 (c >= 'A' && c <= 'F') ||
156 (c >= '0' && c <= '9');
159 // validate that a C String containing %-escapes is syntactically valid
160 bool IsValidPercentEscaped(const char *aValue, int32_t len)
162 for (int32_t i = 0; i < len; i++) {
163 if (aValue[i] == '%') {
164 if (!IsHexDigit(aValue[i + 1]) || !IsHexDigit(aValue[i + 2])) {
165 return false;
169 return true;
172 // Support for continuations (RFC 2231, Section 3)
174 // only a sane number supported
175 #define MAX_CONTINUATIONS 999
177 // part of a continuation
179 class Continuation {
180 public:
181 Continuation(const char *aValue, uint32_t aLength,
182 bool aNeedsPercentDecoding, bool aWasQuotedString) {
183 value = aValue;
184 length = aLength;
185 needsPercentDecoding = aNeedsPercentDecoding;
186 wasQuotedString = aWasQuotedString;
188 Continuation() {
189 // empty constructor needed for nsTArray
190 value = 0L;
191 length = 0;
192 needsPercentDecoding = false;
193 wasQuotedString = false;
195 ~Continuation() {}
197 const char *value;
198 uint32_t length;
199 bool needsPercentDecoding;
200 bool wasQuotedString;
203 // combine segments into a single string, returning the allocated string
204 // (or nullptr) while emptying the list
205 char *combineContinuations(nsTArray<Continuation>& aArray)
207 // Sanity check
208 if (aArray.Length() == 0)
209 return nullptr;
211 // Get an upper bound for the length
212 uint32_t length = 0;
213 for (uint32_t i = 0; i < aArray.Length(); i++) {
214 length += aArray[i].length;
217 // Allocate
218 char *result = (char *) nsMemory::Alloc(length + 1);
220 // Concatenate
221 if (result) {
222 *result = '\0';
224 for (uint32_t i = 0; i < aArray.Length(); i++) {
225 Continuation cont = aArray[i];
226 if (! cont.value) break;
228 char *c = result + strlen(result);
229 strncat(result, cont.value, cont.length);
230 if (cont.needsPercentDecoding) {
231 nsUnescape(c);
233 if (cont.wasQuotedString) {
234 RemoveQuotedStringEscapes(c);
238 // return null if empty value
239 if (*result == '\0') {
240 nsMemory::Free(result);
241 result = nullptr;
243 } else {
244 // Handle OOM
245 NS_WARNING("Out of memory\n");
248 return result;
251 // add a continuation, return false on error if segment already has been seen
252 bool addContinuation(nsTArray<Continuation>& aArray, uint32_t aIndex,
253 const char *aValue, uint32_t aLength,
254 bool aNeedsPercentDecoding, bool aWasQuotedString)
256 if (aIndex < aArray.Length() && aArray[aIndex].value) {
257 NS_WARNING("duplicate RC2231 continuation segment #\n");
258 return false;
261 if (aIndex > MAX_CONTINUATIONS) {
262 NS_WARNING("RC2231 continuation segment # exceeds limit\n");
263 return false;
266 if (aNeedsPercentDecoding && aWasQuotedString) {
267 NS_WARNING("RC2231 continuation segment can't use percent encoding and quoted string form at the same time\n");
268 return false;
271 Continuation cont(aValue, aLength, aNeedsPercentDecoding, aWasQuotedString);
273 if (aArray.Length() <= aIndex) {
274 aArray.SetLength(aIndex + 1);
276 aArray[aIndex] = cont;
278 return true;
281 // parse a segment number; return -1 on error
282 int32_t parseSegmentNumber(const char *aValue, int32_t aLen)
284 if (aLen < 1) {
285 NS_WARNING("segment number missing\n");
286 return -1;
289 if (aLen > 1 && aValue[0] == '0') {
290 NS_WARNING("leading '0' not allowed in segment number\n");
291 return -1;
294 int32_t segmentNumber = 0;
296 for (int32_t i = 0; i < aLen; i++) {
297 if (! (aValue[i] >= '0' && aValue[i] <= '9')) {
298 NS_WARNING("invalid characters in segment number\n");
299 return -1;
302 segmentNumber *= 10;
303 segmentNumber += aValue[i] - '0';
304 if (segmentNumber > MAX_CONTINUATIONS) {
305 NS_WARNING("Segment number exceeds sane size\n");
306 return -1;
310 return segmentNumber;
313 // validate a given octet sequence for compliance with the specified
314 // encoding
315 bool IsValidOctetSequenceForCharset(nsACString& aCharset, const char *aOctets)
317 nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService
318 (NS_UTF8CONVERTERSERVICE_CONTRACTID));
319 if (!cvtUTF8) {
320 NS_WARNING("Can't get UTF8ConverterService\n");
321 return false;
324 nsAutoCString tmpRaw;
325 tmpRaw.Assign(aOctets);
326 nsAutoCString tmpDecoded;
328 nsresult rv = cvtUTF8->ConvertStringToUTF8(tmpRaw,
329 PromiseFlatCString(aCharset).get(),
330 false, false, 1, tmpDecoded);
332 if (rv != NS_OK) {
333 // we can't decode; charset may be unsupported, or the octet sequence
334 // is broken (illegal or incomplete octet sequence contained)
335 NS_WARNING("RFC2231/5987 parameter value does not decode according to specified charset\n");
336 return false;
339 return true;
342 // moved almost verbatim from mimehdrs.cpp
343 // char *
344 // MimeHeaders_get_parameter (const char *header_value, const char *parm_name,
345 // char **charset, char **language)
347 // The format of these header lines is
348 // <token> [ ';' <token> '=' <token-or-quoted-string> ]*
349 NS_IMETHODIMP
350 nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue,
351 const char *aParamName,
352 char **aCharset,
353 char **aLang,
354 char **aResult)
356 return DoParameterInternal(aHeaderValue, aParamName, MIME_FIELD_ENCODING,
357 aCharset, aLang, aResult);
361 nsresult
362 nsMIMEHeaderParamImpl::DoParameterInternal(const char *aHeaderValue,
363 const char *aParamName,
364 ParamDecoding aDecoding,
365 char **aCharset,
366 char **aLang,
367 char **aResult)
370 if (!aHeaderValue || !*aHeaderValue || !aResult)
371 return NS_ERROR_INVALID_ARG;
373 *aResult = nullptr;
375 if (aCharset) *aCharset = nullptr;
376 if (aLang) *aLang = nullptr;
378 nsAutoCString charset;
380 // change to (aDecoding != HTTP_FIELD_ENCODING) when we want to disable
381 // them for HTTP header fields later on, see bug 776324
382 bool acceptContinuations = true;
384 const char *str = aHeaderValue;
386 // skip leading white space.
387 for (; *str && nsCRT::IsAsciiSpace(*str); ++str)
389 const char *start = str;
391 // aParamName is empty. return the first (possibly) _unnamed_ 'parameter'
392 // For instance, return 'inline' in the following case:
393 // Content-Disposition: inline; filename=.....
394 if (!aParamName || !*aParamName)
396 for (; *str && *str != ';' && !nsCRT::IsAsciiSpace(*str); ++str)
398 if (str == start)
399 return NS_ERROR_FIRST_HEADER_FIELD_COMPONENT_EMPTY;
401 *aResult = (char *) nsMemory::Clone(start, (str - start) + 1);
402 NS_ENSURE_TRUE(*aResult, NS_ERROR_OUT_OF_MEMORY);
403 (*aResult)[str - start] = '\0'; // null-terminate
404 return NS_OK;
407 /* Skip forward to first ';' */
408 for (; *str && *str != ';' && *str != ','; ++str)
410 if (*str)
411 str++;
412 /* Skip over following whitespace */
413 for (; *str && nsCRT::IsAsciiSpace(*str); ++str)
416 // Some broken http servers just specify parameters
417 // like 'filename' without specifying disposition
418 // method. Rewind to the first non-white-space
419 // character.
421 if (!*str)
422 str = start;
424 // RFC2231 - The legitimate parm format can be:
425 // A. title=ThisIsTitle
426 // B. title*=us-ascii'en-us'This%20is%20wierd.
427 // C. title*0*=us-ascii'en'This%20is%20wierd.%20We
428 // title*1*=have%20to%20support%20this.
429 // title*2="Else..."
430 // D. title*0="Hey, what you think you are doing?"
431 // title*1="There is no charset and lang info."
432 // RFC5987: only A and B
434 // collect results for the different algorithms (plain filename,
435 // RFC5987/2231-encoded filename, + continuations) separately and decide
436 // which to use at the end
437 char *caseAResult = nullptr;
438 char *caseBResult = nullptr;
439 char *caseCDResult = nullptr;
441 // collect continuation segments
442 nsTArray<Continuation> segments;
445 // our copies of the charset parameter, kept separately as they might
446 // differ for the two formats
447 nsDependentCSubstring charsetB, charsetCD;
449 nsDependentCSubstring lang;
451 int32_t paramLen = strlen(aParamName);
453 while (*str) {
454 // find name/value
456 const char *nameStart = str;
457 const char *nameEnd = nullptr;
458 const char *valueStart = str;
459 const char *valueEnd = nullptr;
460 bool isQuotedString = false;
462 NS_ASSERTION(!nsCRT::IsAsciiSpace(*str), "should be after whitespace.");
464 // Skip forward to the end of this token.
465 for (; *str && !nsCRT::IsAsciiSpace(*str) && *str != '=' && *str != ';'; str++)
467 nameEnd = str;
469 int32_t nameLen = nameEnd - nameStart;
471 // Skip over whitespace, '=', and whitespace
472 while (nsCRT::IsAsciiSpace(*str)) ++str;
473 if (!*str) {
474 break;
476 if (*str++ != '=') {
477 // don't accept parameters without "="
478 goto increment_str;
480 while (nsCRT::IsAsciiSpace(*str)) ++str;
482 if (*str != '"') {
483 // The value is a token, not a quoted string.
484 valueStart = str;
485 for (valueEnd = str;
486 *valueEnd && !nsCRT::IsAsciiSpace (*valueEnd) && *valueEnd != ';';
487 valueEnd++)
489 str = valueEnd;
490 } else {
491 isQuotedString = true;
493 ++str;
494 valueStart = str;
495 for (valueEnd = str; *valueEnd; ++valueEnd) {
496 if (*valueEnd == '\\' && *(valueEnd + 1))
497 ++valueEnd;
498 else if (*valueEnd == '"')
499 break;
501 str = valueEnd;
502 // *valueEnd != null means that *valueEnd is quote character.
503 if (*valueEnd)
504 str++;
507 // See if this is the simplest case (case A above),
508 // a 'single' line value with no charset and lang.
509 // If so, copy it and return.
510 if (nameLen == paramLen &&
511 !nsCRT::strncasecmp(nameStart, aParamName, paramLen)) {
513 if (caseAResult) {
514 // we already have one caseA result, ignore subsequent ones
515 goto increment_str;
518 // if the parameter spans across multiple lines we have to strip out the
519 // line continuation -- jht 4/29/98
520 nsAutoCString tempStr(valueStart, valueEnd - valueStart);
521 tempStr.StripChars("\r\n");
522 char *res = ToNewCString(tempStr);
523 NS_ENSURE_TRUE(res, NS_ERROR_OUT_OF_MEMORY);
525 if (isQuotedString)
526 RemoveQuotedStringEscapes(res);
528 caseAResult = res;
529 // keep going, we may find a RFC 2231/5987 encoded alternative
531 // case B, C, and D
532 else if (nameLen > paramLen &&
533 !nsCRT::strncasecmp(nameStart, aParamName, paramLen) &&
534 *(nameStart + paramLen) == '*') {
536 // 1st char past '*'
537 const char *cp = nameStart + paramLen + 1;
539 // if param name ends in "*" we need do to RFC5987 "ext-value" decoding
540 bool needExtDecoding = *(nameEnd - 1) == '*';
542 bool caseB = nameLen == paramLen + 1;
543 bool caseCStart = (*cp == '0') && needExtDecoding;
545 // parse the segment number
546 int32_t segmentNumber = -1;
547 if (!caseB) {
548 int32_t segLen = (nameEnd - cp) - (needExtDecoding ? 1 : 0);
549 segmentNumber = parseSegmentNumber(cp, segLen);
551 if (segmentNumber == -1) {
552 acceptContinuations = false;
553 goto increment_str;
557 // CaseB and start of CaseC: requires charset and optional language
558 // in quotes (quotes required even if lang is blank)
559 if (caseB || (caseCStart && acceptContinuations)) {
560 // look for single quotation mark(')
561 const char *sQuote1 = PL_strchr(valueStart, 0x27);
562 const char *sQuote2 = sQuote1 ? PL_strchr(sQuote1 + 1, 0x27) : nullptr;
564 // Two single quotation marks must be present even in
565 // absence of charset and lang.
566 if (!sQuote1 || !sQuote2) {
567 NS_WARNING("Mandatory two single quotes are missing in header parameter\n");
570 const char *charsetStart = nullptr;
571 int32_t charsetLength = 0;
572 const char *langStart = nullptr;
573 int32_t langLength = 0;
574 const char *rawValStart = nullptr;
575 int32_t rawValLength = 0;
577 if (sQuote2 && sQuote1) {
578 // both delimiters present: charSet'lang'rawVal
579 rawValStart = sQuote2 + 1;
580 rawValLength = valueEnd - rawValStart;
582 langStart = sQuote1 + 1;
583 langLength = sQuote2 - langStart;
585 charsetStart = valueStart;
586 charsetLength = sQuote1 - charsetStart;
588 else if (sQuote1) {
589 // one delimiter; assume charset'rawVal
590 rawValStart = sQuote1 + 1;
591 rawValLength = valueEnd - rawValStart;
593 charsetStart = valueStart;
594 charsetLength = sQuote1 - valueStart;
596 else {
597 // no delimiter: just rawVal
598 rawValStart = valueStart;
599 rawValLength = valueEnd - valueStart;
602 if (langLength != 0) {
603 lang.Assign(langStart, langLength);
606 // keep the charset for later
607 if (caseB) {
608 charsetB.Assign(charsetStart, charsetLength);
609 } else {
610 // if caseCorD
611 charsetCD.Assign(charsetStart, charsetLength);
614 // non-empty value part
615 if (rawValLength > 0) {
616 if (!caseBResult && caseB) {
617 if (!IsValidPercentEscaped(rawValStart, rawValLength)) {
618 goto increment_str;
621 // allocate buffer for the raw value
622 char *tmpResult = (char *) nsMemory::Clone(rawValStart, rawValLength + 1);
623 if (!tmpResult) {
624 goto increment_str;
626 *(tmpResult + rawValLength) = 0;
628 nsUnescape(tmpResult);
629 caseBResult = tmpResult;
630 } else {
631 // caseC
632 bool added = addContinuation(segments, 0, rawValStart,
633 rawValLength, needExtDecoding,
634 isQuotedString);
636 if (!added) {
637 // continuation not added, stop processing them
638 acceptContinuations = false;
642 } // end of if-block : title*0*= or title*=
643 // caseD: a line of multiline param with no need for unescaping : title*[0-9]=
644 // or 2nd or later lines of a caseC param : title*[1-9]*=
645 else if (acceptContinuations && segmentNumber != -1) {
646 uint32_t valueLength = valueEnd - valueStart;
648 bool added = addContinuation(segments, segmentNumber, valueStart,
649 valueLength, needExtDecoding,
650 isQuotedString);
652 if (!added) {
653 // continuation not added, stop processing them
654 acceptContinuations = false;
656 } // end of if-block : title*[0-9]= or title*[1-9]*=
659 // str now points after the end of the value.
660 // skip over whitespace, ';', whitespace.
661 increment_str:
662 while (nsCRT::IsAsciiSpace(*str)) ++str;
663 if (*str == ';') {
664 ++str;
665 } else {
666 // stop processing the header field; either we are done or the
667 // separator was missing
668 break;
670 while (nsCRT::IsAsciiSpace(*str)) ++str;
673 caseCDResult = combineContinuations(segments);
675 if (caseBResult && !charsetB.IsEmpty()) {
676 // check that the 2231/5987 result decodes properly given the
677 // specified character set
678 if (!IsValidOctetSequenceForCharset(charsetB, caseBResult))
679 caseBResult = nullptr;
682 if (caseCDResult && !charsetCD.IsEmpty()) {
683 // check that the 2231/5987 result decodes properly given the
684 // specified character set
685 if (!IsValidOctetSequenceForCharset(charsetCD, caseCDResult))
686 caseCDResult = nullptr;
689 if (caseBResult) {
690 // prefer simple 5987 format over 2231 with continuations
691 *aResult = caseBResult;
692 caseBResult = nullptr;
693 charset.Assign(charsetB);
695 else if (caseCDResult) {
696 // prefer 2231/5987 with or without continuations over plain format
697 *aResult = caseCDResult;
698 caseCDResult = nullptr;
699 charset.Assign(charsetCD);
701 else if (caseAResult) {
702 *aResult = caseAResult;
703 caseAResult = nullptr;
706 // free unused stuff
707 nsMemory::Free(caseAResult);
708 nsMemory::Free(caseBResult);
709 nsMemory::Free(caseCDResult);
711 // if we have a result
712 if (*aResult) {
713 // then return charset and lang as well
714 if (aLang && !lang.IsEmpty()) {
715 uint32_t len = lang.Length();
716 *aLang = (char *) nsMemory::Clone(lang.BeginReading(), len + 1);
717 if (*aLang) {
718 *(*aLang + len) = 0;
721 if (aCharset && !charset.IsEmpty()) {
722 uint32_t len = charset.Length();
723 *aCharset = (char *) nsMemory::Clone(charset.BeginReading(), len + 1);
724 if (*aCharset) {
725 *(*aCharset + len) = 0;
730 return *aResult ? NS_OK : NS_ERROR_INVALID_ARG;
733 nsresult
734 internalDecodeRFC2047Header(const char* aHeaderVal, const char* aDefaultCharset,
735 bool aOverrideCharset, bool aEatContinuations,
736 nsACString& aResult)
738 aResult.Truncate();
739 if (!aHeaderVal)
740 return NS_ERROR_INVALID_ARG;
741 if (!*aHeaderVal)
742 return NS_OK;
745 // If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string but
746 // aDefaultCharset is specified, decodes RFC 2047 encoding and converts
747 // to UTF-8. Otherwise, just strips away CRLF.
748 if (PL_strstr(aHeaderVal, "=?") ||
749 (aDefaultCharset && (!IsUTF8(nsDependentCString(aHeaderVal)) ||
750 Is7bitNonAsciiString(aHeaderVal, strlen(aHeaderVal))))) {
751 DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult);
752 } else if (aEatContinuations &&
753 (PL_strchr(aHeaderVal, '\n') || PL_strchr(aHeaderVal, '\r'))) {
754 aResult = aHeaderVal;
755 } else {
756 aEatContinuations = false;
757 aResult = aHeaderVal;
760 if (aEatContinuations) {
761 nsAutoCString temp(aResult);
762 temp.ReplaceSubstring("\n\t", " ");
763 temp.ReplaceSubstring("\r\t", " ");
764 temp.StripChars("\r\n");
765 aResult = temp;
768 return NS_OK;
771 NS_IMETHODIMP
772 nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal,
773 const char* aDefaultCharset,
774 bool aOverrideCharset,
775 bool aEatContinuations,
776 nsACString& aResult)
778 return internalDecodeRFC2047Header(aHeaderVal, aDefaultCharset,
779 aOverrideCharset, aEatContinuations,
780 aResult);
783 // true if the character is allowed in a RFC 5987 value
784 // see RFC 5987, Section 3.2.1, "attr-char"
785 bool IsRFC5987AttrChar(char aChar)
787 char c = aChar;
789 return (c >= 'a' && c <= 'z') ||
790 (c >= 'A' && c <= 'Z') ||
791 (c >= '0' && c <= '9') ||
792 (c == '!' || c == '#' || c == '$' || c == '&' ||
793 c == '+' || c == '-' || c == '.' || c == '^' ||
794 c == '_' || c == '`' || c == '|' || c == '~');
797 // percent-decode a value
798 // returns false on failure
799 bool PercentDecode(nsACString& aValue)
801 char *c = (char *) nsMemory::Alloc(aValue.Length() + 1);
802 if (!c) {
803 return false;
806 strcpy(c, PromiseFlatCString(aValue).get());
807 nsUnescape(c);
808 aValue.Assign(c);
809 nsMemory::Free(c);
811 return true;
814 // Decode a parameter value using the encoding defined in RFC 5987
816 // charset "'" [ language ] "'" value-chars
817 NS_IMETHODIMP
818 nsMIMEHeaderParamImpl::DecodeRFC5987Param(const nsACString& aParamVal,
819 nsACString& aLang,
820 nsAString& aResult)
822 nsAutoCString charset;
823 nsAutoCString language;
824 nsAutoCString value;
826 uint32_t delimiters = 0;
827 const char *encoded = PromiseFlatCString(aParamVal).get();
828 const char *c = encoded;
830 while (*c) {
831 char tc = *c++;
833 if (tc == '\'') {
834 // single quote
835 delimiters++;
836 } else if (((unsigned char)tc) >= 128) {
837 // fail early, not ASCII
838 NS_WARNING("non-US-ASCII character in RFC5987-encoded param");
839 return NS_ERROR_INVALID_ARG;
840 } else {
841 if (delimiters == 0) {
842 // valid characters are checked later implicitly
843 charset.Append(tc);
844 } else if (delimiters == 1) {
845 // no value checking for now
846 language.Append(tc);
847 } else if (delimiters == 2) {
848 if (IsRFC5987AttrChar(tc)) {
849 value.Append(tc);
850 } else if (tc == '%') {
851 if (!IsHexDigit(c[0]) || !IsHexDigit(c[1])) {
852 // we expect two more characters
853 NS_WARNING("broken %-escape in RFC5987-encoded param");
854 return NS_ERROR_INVALID_ARG;
856 value.Append(tc);
857 // we consume two more
858 value.Append(*c++);
859 value.Append(*c++);
860 } else {
861 // character not allowed here
862 NS_WARNING("invalid character in RFC5987-encoded param");
863 return NS_ERROR_INVALID_ARG;
869 if (delimiters != 2) {
870 NS_WARNING("missing delimiters in RFC5987-encoded param");
871 return NS_ERROR_INVALID_ARG;
874 // abort early for unsupported encodings
875 if (!charset.LowerCaseEqualsLiteral("utf-8")) {
876 NS_WARNING("unsupported charset in RFC5987-encoded param");
877 return NS_ERROR_INVALID_ARG;
880 // percent-decode
881 if (!PercentDecode(value)) {
882 return NS_ERROR_OUT_OF_MEMORY;
885 // return the encoding
886 aLang.Assign(language);
888 // finally convert octet sequence to UTF-8 and be done
889 nsresult rv = NS_OK;
890 nsCOMPtr<nsIUTF8ConverterService> cvtUTF8 =
891 do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv);
892 NS_ENSURE_SUCCESS(rv, rv);
894 nsAutoCString utf8;
895 rv = cvtUTF8->ConvertStringToUTF8(value, charset.get(), true, false, 1, utf8);
896 NS_ENSURE_SUCCESS(rv, rv);
898 CopyUTF8toUTF16(utf8, aResult);
899 return NS_OK;
902 nsresult
903 internalDecodeParameter(const nsACString& aParamValue, const char* aCharset,
904 const char* aDefaultCharset, bool aOverrideCharset,
905 bool aDecode2047, nsACString& aResult)
907 aResult.Truncate();
908 // If aCharset is given, aParamValue was obtained from RFC2231/5987
909 // encoding and we're pretty sure that it's in aCharset.
910 if (aCharset && *aCharset)
912 nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
913 if (cvtUTF8)
914 return cvtUTF8->ConvertStringToUTF8(aParamValue, aCharset,
915 true, true, 1, aResult);
918 const nsAFlatCString& param = PromiseFlatCString(aParamValue);
919 nsAutoCString unQuoted;
920 nsACString::const_iterator s, e;
921 param.BeginReading(s);
922 param.EndReading(e);
924 // strip '\' when used to quote CR, LF, '"' and '\'
925 for ( ; s != e; ++s) {
926 if ((*s == '\\')) {
927 if (++s == e) {
928 --s; // '\' is at the end. move back and append '\'.
930 else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' && *s != '\\') {
931 --s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\'
933 // else : skip '\' and append the quoted character.
935 unQuoted.Append(*s);
938 aResult = unQuoted;
939 nsresult rv = NS_OK;
941 if (aDecode2047) {
942 nsAutoCString decoded;
944 // Try RFC 2047 encoding, instead.
945 rv = internalDecodeRFC2047Header(unQuoted.get(), aDefaultCharset,
946 aOverrideCharset, true, decoded);
948 if (NS_SUCCEEDED(rv) && !decoded.IsEmpty())
949 aResult = decoded;
952 return rv;
955 NS_IMETHODIMP
956 nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue,
957 const char* aCharset,
958 const char* aDefaultCharset,
959 bool aOverrideCharset,
960 nsACString& aResult)
962 return internalDecodeParameter(aParamValue, aCharset, aDefaultCharset,
963 aOverrideCharset, true, aResult);
966 #define ISHEXCHAR(c) \
967 ((0x30 <= uint8_t(c) && uint8_t(c) <= 0x39) || \
968 (0x41 <= uint8_t(c) && uint8_t(c) <= 0x46) || \
969 (0x61 <= uint8_t(c) && uint8_t(c) <= 0x66))
971 // Decode Q encoding (RFC 2047).
972 // static
973 char *DecodeQ(const char *in, uint32_t length)
975 char *out, *dest = 0;
977 out = dest = (char *)PR_Calloc(length + 1, sizeof(char));
978 if (dest == nullptr)
979 return nullptr;
980 while (length > 0) {
981 unsigned c = 0;
982 switch (*in) {
983 case '=':
984 // check if |in| in the form of '=hh' where h is [0-9a-fA-F].
985 if (length < 3 || !ISHEXCHAR(in[1]) || !ISHEXCHAR(in[2]))
986 goto badsyntax;
987 PR_sscanf(in + 1, "%2X", &c);
988 *out++ = (char) c;
989 in += 3;
990 length -= 3;
991 break;
993 case '_':
994 *out++ = ' ';
995 in++;
996 length--;
997 break;
999 default:
1000 if (*in & 0x80) goto badsyntax;
1001 *out++ = *in++;
1002 length--;
1005 *out++ = '\0';
1007 for (out = dest; *out ; ++out) {
1008 if (*out == '\t')
1009 *out = ' ';
1012 return dest;
1014 badsyntax:
1015 PR_Free(dest);
1016 return nullptr;
1019 // check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842))
1020 // or has ESC which may be an indication that it's in one of many ISO
1021 // 2022 7bit encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554).
1022 // static
1023 bool Is7bitNonAsciiString(const char *input, uint32_t len)
1025 int32_t c;
1027 enum { hz_initial, // No HZ seen yet
1028 hz_escaped, // Inside an HZ ~{ escape sequence
1029 hz_seen, // Have seen at least one complete HZ sequence
1030 hz_notpresent // Have seen something that is not legal HZ
1031 } hz_state;
1033 hz_state = hz_initial;
1034 while (len) {
1035 c = uint8_t(*input++);
1036 len--;
1037 if (c & 0x80) return false;
1038 if (c == 0x1B) return true;
1039 if (c == '~') {
1040 switch (hz_state) {
1041 case hz_initial:
1042 case hz_seen:
1043 if (*input == '{') {
1044 hz_state = hz_escaped;
1045 } else if (*input == '~') {
1046 // ~~ is the HZ encoding of ~. Skip over second ~ as well
1047 hz_state = hz_seen;
1048 input++;
1049 len--;
1050 } else {
1051 hz_state = hz_notpresent;
1053 break;
1055 case hz_escaped:
1056 if (*input == '}') hz_state = hz_seen;
1057 break;
1058 default:
1059 break;
1063 return hz_state == hz_seen;
1066 #define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD)
1068 // copy 'raw' sequences of octets in aInput to aOutput.
1069 // If aDefaultCharset is specified, the input is assumed to be in the
1070 // charset and converted to UTF-8. Otherwise, a blind copy is made.
1071 // If aDefaultCharset is specified, but the conversion to UTF-8
1072 // is not successful, each octet is replaced by Unicode replacement
1073 // chars. *aOutput is advanced by the number of output octets.
1074 // static
1075 void CopyRawHeader(const char *aInput, uint32_t aLen,
1076 const char *aDefaultCharset, nsACString &aOutput)
1078 int32_t c;
1080 // If aDefaultCharset is not specified, make a blind copy.
1081 if (!aDefaultCharset || !*aDefaultCharset) {
1082 aOutput.Append(aInput, aLen);
1083 return;
1086 // Copy as long as it's US-ASCII. An ESC may indicate ISO 2022
1087 // A ~ may indicate it is HZ
1088 while (aLen && (c = uint8_t(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) {
1089 aOutput.Append(char(c));
1090 aLen--;
1092 if (!aLen) {
1093 return;
1095 aInput--;
1097 // skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii
1098 // string and aDefaultCharset is a 7bit non-ascii charset.
1099 bool skipCheck = (c == 0x1B || c == '~') &&
1100 IS_7BIT_NON_ASCII_CHARSET(aDefaultCharset);
1102 // If not UTF-8, treat as default charset
1103 nsCOMPtr<nsIUTF8ConverterService>
1104 cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
1105 nsAutoCString utf8Text;
1106 if (cvtUTF8 &&
1107 NS_SUCCEEDED(
1108 cvtUTF8->ConvertStringToUTF8(Substring(aInput, aInput + aLen),
1109 aDefaultCharset, skipCheck, true, 1,
1110 utf8Text))) {
1111 aOutput.Append(utf8Text);
1112 } else { // replace each octet with Unicode replacement char in UTF-8.
1113 for (uint32_t i = 0; i < aLen; i++) {
1114 c = uint8_t(*aInput++);
1115 if (c & 0x80)
1116 aOutput.Append(REPLACEMENT_CHAR);
1117 else
1118 aOutput.Append(char(c));
1123 nsresult DecodeQOrBase64Str(const char *aEncoded, size_t aLen, char aQOrBase64,
1124 const char *aCharset, nsACString &aResult)
1126 char *decodedText;
1127 NS_ASSERTION(aQOrBase64 == 'Q' || aQOrBase64 == 'B', "Should be 'Q' or 'B'");
1128 if(aQOrBase64 == 'Q')
1129 decodedText = DecodeQ(aEncoded, aLen);
1130 else if (aQOrBase64 == 'B') {
1131 decodedText = PL_Base64Decode(aEncoded, aLen, nullptr);
1132 } else {
1133 return NS_ERROR_INVALID_ARG;
1136 if (!decodedText) {
1137 return NS_ERROR_INVALID_ARG;
1140 nsresult rv;
1141 nsCOMPtr<nsIUTF8ConverterService>
1142 cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv));
1143 nsAutoCString utf8Text;
1144 if (NS_SUCCEEDED(rv)) {
1145 // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
1146 rv = cvtUTF8->ConvertStringToUTF8(nsDependentCString(decodedText),
1147 aCharset,
1148 IS_7BIT_NON_ASCII_CHARSET(aCharset),
1149 true, 1, utf8Text);
1151 PR_Free(decodedText);
1152 if (NS_FAILED(rv)) {
1153 return rv;
1155 aResult.Append(utf8Text);
1157 return NS_OK;
1160 static const char especials[] = "()<>@,;:\\\"/[]?.=";
1162 // |decode_mime_part2_str| taken from comi18n.c
1163 // Decode RFC2047-encoded words in the input and convert the result to UTF-8.
1164 // If aOverrideCharset is true, charset in RFC2047-encoded words is
1165 // ignored and aDefaultCharset is assumed, instead. aDefaultCharset
1166 // is also used to convert raw octets (without RFC 2047 encoding) to UTF-8.
1167 //static
1168 nsresult DecodeRFC2047Str(const char *aHeader, const char *aDefaultCharset,
1169 bool aOverrideCharset, nsACString &aResult)
1171 const char *p, *q = nullptr, *r;
1172 const char *begin; // tracking pointer for where we are in the input buffer
1173 int32_t isLastEncodedWord = 0;
1174 const char *charsetStart, *charsetEnd;
1175 nsAutoCString prevCharset, curCharset;
1176 nsAutoCString encodedText;
1177 char prevEncoding = '\0', curEncoding;
1178 nsresult rv;
1180 begin = aHeader;
1182 // To avoid buffer realloc, if possible, set capacity in advance. No
1183 // matter what, more than 3x expansion can never happen for all charsets
1184 // supported by Mozilla. SCSU/BCSU with the sliding window set to a
1185 // non-BMP block may be exceptions, but Mozilla does not support them.
1186 // Neither any known mail/news program use them. Even if there's, we're
1187 // safe because we don't use a raw *char any more.
1188 aResult.SetCapacity(3 * strlen(aHeader));
1190 while ((p = PL_strstr(begin, "=?")) != 0) {
1191 if (isLastEncodedWord) {
1192 // See if it's all whitespace.
1193 for (q = begin; q < p; ++q) {
1194 if (!PL_strchr(" \t\r\n", *q)) break;
1198 if (!isLastEncodedWord || q < p) {
1199 if (!encodedText.IsEmpty()) {
1200 rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
1201 prevEncoding, prevCharset.get(), aResult);
1202 if (NS_FAILED(rv)) {
1203 aResult.Append(encodedText);
1205 encodedText.Truncate();
1206 prevCharset.Truncate();
1207 prevEncoding = '\0';
1209 // copy the part before the encoded-word
1210 CopyRawHeader(begin, p - begin, aDefaultCharset, aResult);
1211 begin = p;
1214 p += 2;
1216 // Get charset info
1217 charsetStart = p;
1218 charsetEnd = 0;
1219 for (q = p; *q != '?'; q++) {
1220 if (*q <= ' ' || PL_strchr(especials, *q)) {
1221 goto badsyntax;
1224 // RFC 2231 section 5
1225 if (!charsetEnd && *q == '*') {
1226 charsetEnd = q;
1229 if (!charsetEnd) {
1230 charsetEnd = q;
1233 q++;
1234 curEncoding = nsCRT::ToUpper(*q);
1235 if (curEncoding != 'Q' && curEncoding != 'B')
1236 goto badsyntax;
1238 if (q[1] != '?')
1239 goto badsyntax;
1241 r = q;
1242 for (r = q + 2; *r != '?'; r++) {
1243 if (*r < ' ') goto badsyntax;
1245 if (r[1] != '=')
1246 goto badsyntax;
1247 else if (r == q + 2) {
1248 // it's empty, skip
1249 begin = r + 2;
1250 isLastEncodedWord = 1;
1251 continue;
1254 curCharset.Assign(charsetStart, charsetEnd - charsetStart);
1255 // Override charset if requested. Never override labeled UTF-8.
1256 // Use default charset instead of UNKNOWN-8BIT
1257 if ((aOverrideCharset && 0 != nsCRT::strcasecmp(curCharset.get(), "UTF-8"))
1258 || (aDefaultCharset && 0 == nsCRT::strcasecmp(curCharset.get(), "UNKNOWN-8BIT"))
1260 curCharset = aDefaultCharset;
1263 const char *R;
1264 R = r;
1265 if (curEncoding == 'B') {
1266 // bug 227290. ignore an extraneous '=' at the end.
1267 // (# of characters in B-encoded part has to be a multiple of 4)
1268 int32_t n = r - (q + 2);
1269 R -= (n % 4 == 1 && !PL_strncmp(r - 3, "===", 3)) ? 1 : 0;
1271 // Bug 493544. Don't decode the encoded text until it ends
1272 if (R[-1] != '='
1273 && (prevCharset.IsEmpty()
1274 || (curCharset == prevCharset && curEncoding == prevEncoding))
1276 encodedText.Append(q + 2, R - (q + 2));
1277 prevCharset = curCharset;
1278 prevEncoding = curEncoding;
1280 begin = r + 2;
1281 isLastEncodedWord = 1;
1282 continue;
1285 bool bDecoded; // If the current line has been decoded.
1286 bDecoded = false;
1287 if (!encodedText.IsEmpty()) {
1288 if (curCharset == prevCharset && curEncoding == prevEncoding) {
1289 encodedText.Append(q + 2, R - (q + 2));
1290 bDecoded = true;
1292 rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
1293 prevEncoding, prevCharset.get(), aResult);
1294 if (NS_FAILED(rv)) {
1295 aResult.Append(encodedText);
1297 encodedText.Truncate();
1298 prevCharset.Truncate();
1299 prevEncoding = '\0';
1301 if (!bDecoded) {
1302 rv = DecodeQOrBase64Str(q + 2, R - (q + 2), curEncoding,
1303 curCharset.get(), aResult);
1304 if (NS_FAILED(rv)) {
1305 aResult.Append(encodedText);
1309 begin = r + 2;
1310 isLastEncodedWord = 1;
1311 continue;
1313 badsyntax:
1314 if (!encodedText.IsEmpty()) {
1315 rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
1316 prevEncoding, prevCharset.get(), aResult);
1317 if (NS_FAILED(rv)) {
1318 aResult.Append(encodedText);
1320 encodedText.Truncate();
1321 prevCharset.Truncate();
1323 // copy the part before the encoded-word
1324 aResult.Append(begin, p - begin);
1325 begin = p;
1326 isLastEncodedWord = 0;
1329 if (!encodedText.IsEmpty()) {
1330 rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
1331 prevEncoding, prevCharset.get(), aResult);
1332 if (NS_FAILED(rv)) {
1333 aResult.Append(encodedText);
1337 // put the tail back
1338 CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult);
1340 nsAutoCString tempStr(aResult);
1341 tempStr.ReplaceChar('\t', ' ');
1342 aResult = tempStr;
1344 return NS_OK;