1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
19 // A WordEncoder is an RFC 2047 encoded-word encoder.
23 // BEncoding represents Base64 encoding scheme as defined by RFC 2045.
24 BEncoding
= WordEncoder('b')
25 // QEncoding represents the Q-encoding scheme as defined by RFC 2047.
26 QEncoding
= WordEncoder('q')
30 errInvalidWord
= errors
.New("mime: invalid RFC 2047 encoded-word")
33 // Encode returns the encoded-word form of s. If s is ASCII without special
34 // characters, it is returned unchanged. The provided charset is the IANA
35 // charset name of s. It is case insensitive.
36 func (e WordEncoder
) Encode(charset
, s
string) string {
37 if !needsEncoding(s
) {
40 return e
.encodeWord(charset
, s
)
43 func needsEncoding(s
string) bool {
45 if (b
< ' ' || b
> '~') && b
!= '\t' {
52 // encodeWord encodes a string into an encoded-word.
53 func (e WordEncoder
) encodeWord(charset
, s
string) string {
57 e
.openWord(buf
, charset
)
59 e
.bEncode(buf
, charset
, s
)
61 e
.qEncode(buf
, charset
, s
)
69 // The maximum length of an encoded-word is 75 characters.
70 // See RFC 2047, section 2.
71 maxEncodedWordLen
= 75
72 // maxContentLen is how much content can be encoded, ignoring the header and
74 maxContentLen
= maxEncodedWordLen
- len("=?UTF-8?q?") - len("?=")
77 var maxBase64Len
= base64
.StdEncoding
.DecodedLen(maxContentLen
)
79 // bEncode encodes s using base64 encoding and writes it to buf.
80 func (e WordEncoder
) bEncode(buf
*bytes
.Buffer
, charset
, s
string) {
81 w
:= base64
.NewEncoder(base64
.StdEncoding
, buf
)
82 // If the charset is not UTF-8 or if the content is short, do not bother
83 // splitting the encoded-word.
84 if !isUTF8(charset
) || base64
.StdEncoding
.EncodedLen(len(s
)) <= maxContentLen
{
90 var currentLen
, last
, runeLen
int
91 for i
:= 0; i
< len(s
); i
+= runeLen
{
92 // Multi-byte characters must not be split across encoded-words.
93 // See RFC 2047, section 5.3.
94 _
, runeLen
= utf8
.DecodeRuneInString(s
[i
:])
96 if currentLen
+runeLen
<= maxBase64Len
{
99 io
.WriteString(w
, s
[last
:i
])
101 e
.splitWord(buf
, charset
)
106 io
.WriteString(w
, s
[last
:])
110 // qEncode encodes s using Q encoding and writes it to buf. It splits the
111 // encoded-words when necessary.
112 func (e WordEncoder
) qEncode(buf
*bytes
.Buffer
, charset
, s
string) {
113 // We only split encoded-words when the charset is UTF-8.
114 if !isUTF8(charset
) {
119 var currentLen
, runeLen
int
120 for i
:= 0; i
< len(s
); i
+= runeLen
{
122 // Multi-byte characters must not be split across encoded-words.
123 // See RFC 2047, section 5.3.
125 if b
>= ' ' && b
<= '~' && b
!= '=' && b
!= '?' && b
!= '_' {
126 runeLen
, encLen
= 1, 1
128 _
, runeLen
= utf8
.DecodeRuneInString(s
[i
:])
132 if currentLen
+encLen
> maxContentLen
{
133 e
.splitWord(buf
, charset
)
136 writeQString(buf
, s
[i
:i
+runeLen
])
141 // writeQString encodes s using Q encoding and writes it to buf.
142 func writeQString(buf
*bytes
.Buffer
, s
string) {
143 for i
:= 0; i
< len(s
); i
++ {
147 case b
>= '!' && b
<= '~' && b
!= '=' && b
!= '?' && b
!= '_':
151 buf
.WriteByte(upperhex
[b
>>4])
152 buf
.WriteByte(upperhex
[b
&0x0f])
157 // openWord writes the beginning of an encoded-word into buf.
158 func (e WordEncoder
) openWord(buf
*bytes
.Buffer
, charset
string) {
159 buf
.WriteString("=?")
160 buf
.WriteString(charset
)
162 buf
.WriteByte(byte(e
))
166 // closeWord writes the end of an encoded-word into buf.
167 func closeWord(buf
*bytes
.Buffer
) {
168 buf
.WriteString("?=")
171 // splitWord closes the current encoded-word and opens a new one.
172 func (e WordEncoder
) splitWord(buf
*bytes
.Buffer
, charset
string) {
175 e
.openWord(buf
, charset
)
178 func isUTF8(charset
string) bool {
179 return strings
.EqualFold(charset
, "UTF-8")
182 const upperhex
= "0123456789ABCDEF"
184 // A WordDecoder decodes MIME headers containing RFC 2047 encoded-words.
185 type WordDecoder
struct {
186 // CharsetReader, if non-nil, defines a function to generate
187 // charset-conversion readers, converting from the provided
188 // charset into UTF-8.
189 // Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets
190 // are handled by default.
191 // One of the CharsetReader's result values must be non-nil.
192 CharsetReader
func(charset
string, input io
.Reader
) (io
.Reader
, error
)
195 // Decode decodes an RFC 2047 encoded-word.
196 func (d
*WordDecoder
) Decode(word
string) (string, error
) {
197 // See https://tools.ietf.org/html/rfc2047#section-2 for details.
198 // Our decoder is permissive, we accept empty encoded-text.
199 if len(word
) < 8 ||
!strings
.HasPrefix(word
, "=?") ||
!strings
.HasSuffix(word
, "?=") || strings
.Count(word
, "?") != 4 {
200 return "", errInvalidWord
202 word
= word
[2 : len(word
)-2]
204 // split delimits the first 2 fields
205 split
:= strings
.IndexByte(word
, '?')
207 // split word "UTF-8?q?ascii" into "UTF-8", 'q', and "ascii"
208 charset
:= word
[:split
]
209 if len(charset
) == 0 {
210 return "", errInvalidWord
212 if len(word
) < split
+3 {
213 return "", errInvalidWord
215 encoding
:= word
[split
+1]
216 // the field after split must only be one byte
217 if word
[split
+2] != '?' {
218 return "", errInvalidWord
220 text
:= word
[split
+3:]
222 content
, err
:= decode(encoding
, text
)
230 if err
:= d
.convert(buf
, charset
, content
); err
!= nil {
234 return buf
.String(), nil
237 // DecodeHeader decodes all encoded-words of the given string. It returns an
238 // error if and only if CharsetReader of d returns an error.
239 func (d
*WordDecoder
) DecodeHeader(header
string) (string, error
) {
240 // If there is no encoded-word, returns before creating a buffer.
241 i
:= strings
.Index(header
, "=?")
249 buf
.WriteString(header
[:i
])
252 betweenWords
:= false
254 start
:= strings
.Index(header
, "=?")
258 cur
:= start
+ len("=?")
260 i
:= strings
.Index(header
[cur
:], "?")
264 charset
:= header
[cur
: cur
+i
]
267 if len(header
) < cur
+len("Q??=") {
270 encoding
:= header
[cur
]
273 if header
[cur
] != '?' {
278 j
:= strings
.Index(header
[cur
:], "?=")
282 text
:= header
[cur
: cur
+j
]
283 end
:= cur
+ j
+ len("?=")
285 content
, err
:= decode(encoding
, text
)
288 buf
.WriteString(header
[:start
+2])
289 header
= header
[start
+2:]
293 // Write characters before the encoded-word. White-space and newline
294 // characters separating two encoded-words must be deleted.
295 if start
> 0 && (!betweenWords ||
hasNonWhitespace(header
[:start
])) {
296 buf
.WriteString(header
[:start
])
299 if err
:= d
.convert(buf
, charset
, content
); err
!= nil {
303 header
= header
[end
:]
308 buf
.WriteString(header
)
311 return buf
.String(), nil
314 func decode(encoding
byte, text
string) ([]byte, error
) {
317 return base64
.StdEncoding
.DecodeString(text
)
321 return nil, errInvalidWord
325 func (d
*WordDecoder
) convert(buf
*bytes
.Buffer
, charset
string, content
[]byte) error
{
327 case strings
.EqualFold("utf-8", charset
):
329 case strings
.EqualFold("iso-8859-1", charset
):
330 for _
, c
:= range content
{
331 buf
.WriteRune(rune(c
))
333 case strings
.EqualFold("us-ascii", charset
):
334 for _
, c
:= range content
{
335 if c
>= utf8
.RuneSelf
{
336 buf
.WriteRune(unicode
.ReplacementChar
)
342 if d
.CharsetReader
== nil {
343 return fmt
.Errorf("mime: unhandled charset %q", charset
)
345 r
, err
:= d
.CharsetReader(strings
.ToLower(charset
), bytes
.NewReader(content
))
349 if _
, err
= buf
.ReadFrom(r
); err
!= nil {
356 // hasNonWhitespace reports whether s (assumed to be ASCII) contains at least
357 // one byte of non-whitespace.
358 func hasNonWhitespace(s
string) bool {
359 for _
, b
:= range s
{
361 // Encoded-words can only be separated by linear white spaces which does
362 // not include vertical tabs (\v).
363 case ' ', '\t', '\n', '\r':
371 // qDecode decodes a Q encoded string.
372 func qDecode(s
string) ([]byte, error
) {
373 dec
:= make([]byte, len(s
))
375 for i
:= 0; i
< len(s
); i
++ {
381 return nil, errInvalidWord
383 b
, err
:= readHexByte(s
[i
+1], s
[i
+2])
389 case (c
<= '~' && c
>= ' ') || c
== '\n' || c
== '\r' || c
== '\t':
392 return nil, errInvalidWord
400 // readHexByte returns the byte from its quoted-printable representation.
401 func readHexByte(a
, b
byte) (byte, error
) {
404 if hb
, err
= fromHex(a
); err
!= nil {
407 if lb
, err
= fromHex(b
); err
!= nil {
410 return hb
<<4 | lb
, nil
413 func fromHex(b
byte) (byte, error
) {
415 case b
>= '0' && b
<= '9':
417 case b
>= 'A' && b
<= 'F':
418 return b
- 'A' + 10, nil
419 // Accept badly encoded bytes.
420 case b
>= 'a' && b
<= 'f':
421 return b
- 'a' + 10, nil
423 return 0, fmt
.Errorf("mime: invalid hex byte %#02x", b
)
426 var bufPool
= sync
.Pool
{
427 New
: func() interface{} {
428 return new(bytes
.Buffer
)
432 func getBuffer() *bytes
.Buffer
{
433 return bufPool
.Get().(*bytes
.Buffer
)
436 func putBuffer(buf
*bytes
.Buffer
) {
437 if buf
.Len() > 1024 {