2016-08-05 Vladimir Makarov <vmakarov@redhat.com>
[official-gcc.git] / libgo / go / mime / encodedword.go
blobc3ca4bacd130700ccbc04a0efa00a94fd646cecb
1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package mime
7 import (
8 "bytes"
9 "encoding/base64"
10 "errors"
11 "fmt"
12 "io"
13 "strings"
14 "sync"
15 "unicode"
16 "unicode/utf8"
19 // A WordEncoder is an RFC 2047 encoded-word encoder.
20 type WordEncoder byte
22 const (
23 // BEncoding represents Base64 encoding scheme as defined by RFC 2045.
24 BEncoding = WordEncoder('b')
25 // QEncoding represents the Q-encoding scheme as defined by RFC 2047.
26 QEncoding = WordEncoder('q')
29 var (
30 errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word")
33 // Encode returns the encoded-word form of s. If s is ASCII without special
34 // characters, it is returned unchanged. The provided charset is the IANA
35 // charset name of s. It is case insensitive.
36 func (e WordEncoder) Encode(charset, s string) string {
37 if !needsEncoding(s) {
38 return s
40 return e.encodeWord(charset, s)
43 func needsEncoding(s string) bool {
44 for _, b := range s {
45 if (b < ' ' || b > '~') && b != '\t' {
46 return true
49 return false
52 // encodeWord encodes a string into an encoded-word.
53 func (e WordEncoder) encodeWord(charset, s string) string {
54 buf := getBuffer()
55 defer putBuffer(buf)
57 e.openWord(buf, charset)
58 if e == BEncoding {
59 e.bEncode(buf, charset, s)
60 } else {
61 e.qEncode(buf, charset, s)
63 closeWord(buf)
65 return buf.String()
68 const (
69 // The maximum length of an encoded-word is 75 characters.
70 // See RFC 2047, section 2.
71 maxEncodedWordLen = 75
72 // maxContentLen is how much content can be encoded, ignoring the header and
73 // 2-byte footer.
74 maxContentLen = maxEncodedWordLen - len("=?UTF-8?q?") - len("?=")
77 var maxBase64Len = base64.StdEncoding.DecodedLen(maxContentLen)
79 // bEncode encodes s using base64 encoding and writes it to buf.
80 func (e WordEncoder) bEncode(buf *bytes.Buffer, charset, s string) {
81 w := base64.NewEncoder(base64.StdEncoding, buf)
82 // If the charset is not UTF-8 or if the content is short, do not bother
83 // splitting the encoded-word.
84 if !isUTF8(charset) || base64.StdEncoding.EncodedLen(len(s)) <= maxContentLen {
85 io.WriteString(w, s)
86 w.Close()
87 return
90 var currentLen, last, runeLen int
91 for i := 0; i < len(s); i += runeLen {
92 // Multi-byte characters must not be split across encoded-words.
93 // See RFC 2047, section 5.3.
94 _, runeLen = utf8.DecodeRuneInString(s[i:])
96 if currentLen+runeLen <= maxBase64Len {
97 currentLen += runeLen
98 } else {
99 io.WriteString(w, s[last:i])
100 w.Close()
101 e.splitWord(buf, charset)
102 last = i
103 currentLen = runeLen
106 io.WriteString(w, s[last:])
107 w.Close()
110 // qEncode encodes s using Q encoding and writes it to buf. It splits the
111 // encoded-words when necessary.
112 func (e WordEncoder) qEncode(buf *bytes.Buffer, charset, s string) {
113 // We only split encoded-words when the charset is UTF-8.
114 if !isUTF8(charset) {
115 writeQString(buf, s)
116 return
119 var currentLen, runeLen int
120 for i := 0; i < len(s); i += runeLen {
121 b := s[i]
122 // Multi-byte characters must not be split across encoded-words.
123 // See RFC 2047, section 5.3.
124 var encLen int
125 if b >= ' ' && b <= '~' && b != '=' && b != '?' && b != '_' {
126 runeLen, encLen = 1, 1
127 } else {
128 _, runeLen = utf8.DecodeRuneInString(s[i:])
129 encLen = 3 * runeLen
132 if currentLen+encLen > maxContentLen {
133 e.splitWord(buf, charset)
134 currentLen = 0
136 writeQString(buf, s[i:i+runeLen])
137 currentLen += encLen
141 // writeQString encodes s using Q encoding and writes it to buf.
142 func writeQString(buf *bytes.Buffer, s string) {
143 for i := 0; i < len(s); i++ {
144 switch b := s[i]; {
145 case b == ' ':
146 buf.WriteByte('_')
147 case b >= '!' && b <= '~' && b != '=' && b != '?' && b != '_':
148 buf.WriteByte(b)
149 default:
150 buf.WriteByte('=')
151 buf.WriteByte(upperhex[b>>4])
152 buf.WriteByte(upperhex[b&0x0f])
157 // openWord writes the beginning of an encoded-word into buf.
158 func (e WordEncoder) openWord(buf *bytes.Buffer, charset string) {
159 buf.WriteString("=?")
160 buf.WriteString(charset)
161 buf.WriteByte('?')
162 buf.WriteByte(byte(e))
163 buf.WriteByte('?')
166 // closeWord writes the end of an encoded-word into buf.
167 func closeWord(buf *bytes.Buffer) {
168 buf.WriteString("?=")
171 // splitWord closes the current encoded-word and opens a new one.
172 func (e WordEncoder) splitWord(buf *bytes.Buffer, charset string) {
173 closeWord(buf)
174 buf.WriteByte(' ')
175 e.openWord(buf, charset)
178 func isUTF8(charset string) bool {
179 return strings.EqualFold(charset, "UTF-8")
182 const upperhex = "0123456789ABCDEF"
184 // A WordDecoder decodes MIME headers containing RFC 2047 encoded-words.
185 type WordDecoder struct {
186 // CharsetReader, if non-nil, defines a function to generate
187 // charset-conversion readers, converting from the provided
188 // charset into UTF-8.
189 // Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets
190 // are handled by default.
191 // One of the the CharsetReader's result values must be non-nil.
192 CharsetReader func(charset string, input io.Reader) (io.Reader, error)
195 // Decode decodes an RFC 2047 encoded-word.
196 func (d *WordDecoder) Decode(word string) (string, error) {
197 if !strings.HasPrefix(word, "=?") || !strings.HasSuffix(word, "?=") || strings.Count(word, "?") != 4 {
198 return "", errInvalidWord
200 word = word[2 : len(word)-2]
202 // split delimits the first 2 fields
203 split := strings.IndexByte(word, '?')
204 // the field after split must only be one byte
205 if word[split+2] != '?' {
206 return "", errInvalidWord
209 // split word "UTF-8?q?ascii" into "UTF-8", 'q', and "ascii"
210 charset := word[:split]
211 encoding := word[split+1]
212 text := word[split+3:]
214 content, err := decode(encoding, text)
215 if err != nil {
216 return "", err
219 buf := getBuffer()
220 defer putBuffer(buf)
222 if err := d.convert(buf, charset, content); err != nil {
223 return "", err
226 return buf.String(), nil
229 // DecodeHeader decodes all encoded-words of the given string. It returns an
230 // error if and only if CharsetReader of d returns an error.
231 func (d *WordDecoder) DecodeHeader(header string) (string, error) {
232 // If there is no encoded-word, returns before creating a buffer.
233 i := strings.Index(header, "=?")
234 if i == -1 {
235 return header, nil
238 buf := getBuffer()
239 defer putBuffer(buf)
241 buf.WriteString(header[:i])
242 header = header[i:]
244 betweenWords := false
245 for {
246 start := strings.Index(header, "=?")
247 if start == -1 {
248 break
250 cur := start + len("=?")
252 i := strings.Index(header[cur:], "?")
253 if i == -1 {
254 break
256 charset := header[cur : cur+i]
257 cur += i + len("?")
259 if len(header) < cur+len("Q??=") {
260 break
262 encoding := header[cur]
263 cur++
265 if header[cur] != '?' {
266 break
268 cur++
270 j := strings.Index(header[cur:], "?=")
271 if j == -1 {
272 break
274 text := header[cur : cur+j]
275 end := cur + j + len("?=")
277 content, err := decode(encoding, text)
278 if err != nil {
279 betweenWords = false
280 buf.WriteString(header[:start+2])
281 header = header[start+2:]
282 continue
285 // Write characters before the encoded-word. White-space and newline
286 // characters separating two encoded-words must be deleted.
287 if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) {
288 buf.WriteString(header[:start])
291 if err := d.convert(buf, charset, content); err != nil {
292 return "", err
295 header = header[end:]
296 betweenWords = true
299 if len(header) > 0 {
300 buf.WriteString(header)
303 return buf.String(), nil
306 func decode(encoding byte, text string) ([]byte, error) {
307 switch encoding {
308 case 'B', 'b':
309 return base64.StdEncoding.DecodeString(text)
310 case 'Q', 'q':
311 return qDecode(text)
312 default:
313 return nil, errInvalidWord
317 func (d *WordDecoder) convert(buf *bytes.Buffer, charset string, content []byte) error {
318 switch {
319 case strings.EqualFold("utf-8", charset):
320 buf.Write(content)
321 case strings.EqualFold("iso-8859-1", charset):
322 for _, c := range content {
323 buf.WriteRune(rune(c))
325 case strings.EqualFold("us-ascii", charset):
326 for _, c := range content {
327 if c >= utf8.RuneSelf {
328 buf.WriteRune(unicode.ReplacementChar)
329 } else {
330 buf.WriteByte(c)
333 default:
334 if d.CharsetReader == nil {
335 return fmt.Errorf("mime: unhandled charset %q", charset)
337 r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content))
338 if err != nil {
339 return err
341 if _, err = buf.ReadFrom(r); err != nil {
342 return err
345 return nil
348 // hasNonWhitespace reports whether s (assumed to be ASCII) contains at least
349 // one byte of non-whitespace.
350 func hasNonWhitespace(s string) bool {
351 for _, b := range s {
352 switch b {
353 // Encoded-words can only be separated by linear white spaces which does
354 // not include vertical tabs (\v).
355 case ' ', '\t', '\n', '\r':
356 default:
357 return true
360 return false
363 // qDecode decodes a Q encoded string.
364 func qDecode(s string) ([]byte, error) {
365 dec := make([]byte, len(s))
366 n := 0
367 for i := 0; i < len(s); i++ {
368 switch c := s[i]; {
369 case c == '_':
370 dec[n] = ' '
371 case c == '=':
372 if i+2 >= len(s) {
373 return nil, errInvalidWord
375 b, err := readHexByte(s[i+1], s[i+2])
376 if err != nil {
377 return nil, err
379 dec[n] = b
380 i += 2
381 case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t':
382 dec[n] = c
383 default:
384 return nil, errInvalidWord
389 return dec[:n], nil
392 // readHexByte returns the byte from its quoted-printable representation.
393 func readHexByte(a, b byte) (byte, error) {
394 var hb, lb byte
395 var err error
396 if hb, err = fromHex(a); err != nil {
397 return 0, err
399 if lb, err = fromHex(b); err != nil {
400 return 0, err
402 return hb<<4 | lb, nil
405 func fromHex(b byte) (byte, error) {
406 switch {
407 case b >= '0' && b <= '9':
408 return b - '0', nil
409 case b >= 'A' && b <= 'F':
410 return b - 'A' + 10, nil
411 // Accept badly encoded bytes.
412 case b >= 'a' && b <= 'f':
413 return b - 'a' + 10, nil
415 return 0, fmt.Errorf("mime: invalid hex byte %#02x", b)
418 var bufPool = sync.Pool{
419 New: func() interface{} {
420 return new(bytes.Buffer)
424 func getBuffer() *bytes.Buffer {
425 return bufPool.Get().(*bytes.Buffer)
428 func putBuffer(buf *bytes.Buffer) {
429 if buf.Len() > 1024 {
430 return
432 buf.Reset()
433 bufPool.Put(buf)