Avoid is_constant calls in vectorizable_bswap
[official-gcc.git] / libgo / go / mime / encodedword.go
blob99eb432f54e172dcf65c9a5a54ba45a869f26c5b
1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package mime
7 import (
8 "bytes"
9 "encoding/base64"
10 "errors"
11 "fmt"
12 "io"
13 "strings"
14 "sync"
15 "unicode"
16 "unicode/utf8"
19 // A WordEncoder is an RFC 2047 encoded-word encoder.
20 type WordEncoder byte
22 const (
23 // BEncoding represents Base64 encoding scheme as defined by RFC 2045.
24 BEncoding = WordEncoder('b')
25 // QEncoding represents the Q-encoding scheme as defined by RFC 2047.
26 QEncoding = WordEncoder('q')
29 var (
30 errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word")
33 // Encode returns the encoded-word form of s. If s is ASCII without special
34 // characters, it is returned unchanged. The provided charset is the IANA
35 // charset name of s. It is case insensitive.
36 func (e WordEncoder) Encode(charset, s string) string {
37 if !needsEncoding(s) {
38 return s
40 return e.encodeWord(charset, s)
43 func needsEncoding(s string) bool {
44 for _, b := range s {
45 if (b < ' ' || b > '~') && b != '\t' {
46 return true
49 return false
52 // encodeWord encodes a string into an encoded-word.
53 func (e WordEncoder) encodeWord(charset, s string) string {
54 buf := getBuffer()
55 defer putBuffer(buf)
57 e.openWord(buf, charset)
58 if e == BEncoding {
59 e.bEncode(buf, charset, s)
60 } else {
61 e.qEncode(buf, charset, s)
63 closeWord(buf)
65 return buf.String()
68 const (
69 // The maximum length of an encoded-word is 75 characters.
70 // See RFC 2047, section 2.
71 maxEncodedWordLen = 75
72 // maxContentLen is how much content can be encoded, ignoring the header and
73 // 2-byte footer.
74 maxContentLen = maxEncodedWordLen - len("=?UTF-8?q?") - len("?=")
77 var maxBase64Len = base64.StdEncoding.DecodedLen(maxContentLen)
79 // bEncode encodes s using base64 encoding and writes it to buf.
80 func (e WordEncoder) bEncode(buf *bytes.Buffer, charset, s string) {
81 w := base64.NewEncoder(base64.StdEncoding, buf)
82 // If the charset is not UTF-8 or if the content is short, do not bother
83 // splitting the encoded-word.
84 if !isUTF8(charset) || base64.StdEncoding.EncodedLen(len(s)) <= maxContentLen {
85 io.WriteString(w, s)
86 w.Close()
87 return
90 var currentLen, last, runeLen int
91 for i := 0; i < len(s); i += runeLen {
92 // Multi-byte characters must not be split across encoded-words.
93 // See RFC 2047, section 5.3.
94 _, runeLen = utf8.DecodeRuneInString(s[i:])
96 if currentLen+runeLen <= maxBase64Len {
97 currentLen += runeLen
98 } else {
99 io.WriteString(w, s[last:i])
100 w.Close()
101 e.splitWord(buf, charset)
102 last = i
103 currentLen = runeLen
106 io.WriteString(w, s[last:])
107 w.Close()
110 // qEncode encodes s using Q encoding and writes it to buf. It splits the
111 // encoded-words when necessary.
112 func (e WordEncoder) qEncode(buf *bytes.Buffer, charset, s string) {
113 // We only split encoded-words when the charset is UTF-8.
114 if !isUTF8(charset) {
115 writeQString(buf, s)
116 return
119 var currentLen, runeLen int
120 for i := 0; i < len(s); i += runeLen {
121 b := s[i]
122 // Multi-byte characters must not be split across encoded-words.
123 // See RFC 2047, section 5.3.
124 var encLen int
125 if b >= ' ' && b <= '~' && b != '=' && b != '?' && b != '_' {
126 runeLen, encLen = 1, 1
127 } else {
128 _, runeLen = utf8.DecodeRuneInString(s[i:])
129 encLen = 3 * runeLen
132 if currentLen+encLen > maxContentLen {
133 e.splitWord(buf, charset)
134 currentLen = 0
136 writeQString(buf, s[i:i+runeLen])
137 currentLen += encLen
141 // writeQString encodes s using Q encoding and writes it to buf.
142 func writeQString(buf *bytes.Buffer, s string) {
143 for i := 0; i < len(s); i++ {
144 switch b := s[i]; {
145 case b == ' ':
146 buf.WriteByte('_')
147 case b >= '!' && b <= '~' && b != '=' && b != '?' && b != '_':
148 buf.WriteByte(b)
149 default:
150 buf.WriteByte('=')
151 buf.WriteByte(upperhex[b>>4])
152 buf.WriteByte(upperhex[b&0x0f])
157 // openWord writes the beginning of an encoded-word into buf.
158 func (e WordEncoder) openWord(buf *bytes.Buffer, charset string) {
159 buf.WriteString("=?")
160 buf.WriteString(charset)
161 buf.WriteByte('?')
162 buf.WriteByte(byte(e))
163 buf.WriteByte('?')
166 // closeWord writes the end of an encoded-word into buf.
167 func closeWord(buf *bytes.Buffer) {
168 buf.WriteString("?=")
171 // splitWord closes the current encoded-word and opens a new one.
172 func (e WordEncoder) splitWord(buf *bytes.Buffer, charset string) {
173 closeWord(buf)
174 buf.WriteByte(' ')
175 e.openWord(buf, charset)
178 func isUTF8(charset string) bool {
179 return strings.EqualFold(charset, "UTF-8")
182 const upperhex = "0123456789ABCDEF"
184 // A WordDecoder decodes MIME headers containing RFC 2047 encoded-words.
185 type WordDecoder struct {
186 // CharsetReader, if non-nil, defines a function to generate
187 // charset-conversion readers, converting from the provided
188 // charset into UTF-8.
189 // Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets
190 // are handled by default.
191 // One of the CharsetReader's result values must be non-nil.
192 CharsetReader func(charset string, input io.Reader) (io.Reader, error)
195 // Decode decodes an RFC 2047 encoded-word.
196 func (d *WordDecoder) Decode(word string) (string, error) {
197 // See https://tools.ietf.org/html/rfc2047#section-2 for details.
198 // Our decoder is permissive, we accept empty encoded-text.
199 if len(word) < 8 || !strings.HasPrefix(word, "=?") || !strings.HasSuffix(word, "?=") || strings.Count(word, "?") != 4 {
200 return "", errInvalidWord
202 word = word[2 : len(word)-2]
204 // split delimits the first 2 fields
205 split := strings.IndexByte(word, '?')
207 // split word "UTF-8?q?ascii" into "UTF-8", 'q', and "ascii"
208 charset := word[:split]
209 if len(charset) == 0 {
210 return "", errInvalidWord
212 if len(word) < split+3 {
213 return "", errInvalidWord
215 encoding := word[split+1]
216 // the field after split must only be one byte
217 if word[split+2] != '?' {
218 return "", errInvalidWord
220 text := word[split+3:]
222 content, err := decode(encoding, text)
223 if err != nil {
224 return "", err
227 buf := getBuffer()
228 defer putBuffer(buf)
230 if err := d.convert(buf, charset, content); err != nil {
231 return "", err
234 return buf.String(), nil
237 // DecodeHeader decodes all encoded-words of the given string. It returns an
238 // error if and only if CharsetReader of d returns an error.
239 func (d *WordDecoder) DecodeHeader(header string) (string, error) {
240 // If there is no encoded-word, returns before creating a buffer.
241 i := strings.Index(header, "=?")
242 if i == -1 {
243 return header, nil
246 buf := getBuffer()
247 defer putBuffer(buf)
249 buf.WriteString(header[:i])
250 header = header[i:]
252 betweenWords := false
253 for {
254 start := strings.Index(header, "=?")
255 if start == -1 {
256 break
258 cur := start + len("=?")
260 i := strings.Index(header[cur:], "?")
261 if i == -1 {
262 break
264 charset := header[cur : cur+i]
265 cur += i + len("?")
267 if len(header) < cur+len("Q??=") {
268 break
270 encoding := header[cur]
271 cur++
273 if header[cur] != '?' {
274 break
276 cur++
278 j := strings.Index(header[cur:], "?=")
279 if j == -1 {
280 break
282 text := header[cur : cur+j]
283 end := cur + j + len("?=")
285 content, err := decode(encoding, text)
286 if err != nil {
287 betweenWords = false
288 buf.WriteString(header[:start+2])
289 header = header[start+2:]
290 continue
293 // Write characters before the encoded-word. White-space and newline
294 // characters separating two encoded-words must be deleted.
295 if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) {
296 buf.WriteString(header[:start])
299 if err := d.convert(buf, charset, content); err != nil {
300 return "", err
303 header = header[end:]
304 betweenWords = true
307 if len(header) > 0 {
308 buf.WriteString(header)
311 return buf.String(), nil
314 func decode(encoding byte, text string) ([]byte, error) {
315 switch encoding {
316 case 'B', 'b':
317 return base64.StdEncoding.DecodeString(text)
318 case 'Q', 'q':
319 return qDecode(text)
320 default:
321 return nil, errInvalidWord
325 func (d *WordDecoder) convert(buf *bytes.Buffer, charset string, content []byte) error {
326 switch {
327 case strings.EqualFold("utf-8", charset):
328 buf.Write(content)
329 case strings.EqualFold("iso-8859-1", charset):
330 for _, c := range content {
331 buf.WriteRune(rune(c))
333 case strings.EqualFold("us-ascii", charset):
334 for _, c := range content {
335 if c >= utf8.RuneSelf {
336 buf.WriteRune(unicode.ReplacementChar)
337 } else {
338 buf.WriteByte(c)
341 default:
342 if d.CharsetReader == nil {
343 return fmt.Errorf("mime: unhandled charset %q", charset)
345 r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content))
346 if err != nil {
347 return err
349 if _, err = buf.ReadFrom(r); err != nil {
350 return err
353 return nil
356 // hasNonWhitespace reports whether s (assumed to be ASCII) contains at least
357 // one byte of non-whitespace.
358 func hasNonWhitespace(s string) bool {
359 for _, b := range s {
360 switch b {
361 // Encoded-words can only be separated by linear white spaces which does
362 // not include vertical tabs (\v).
363 case ' ', '\t', '\n', '\r':
364 default:
365 return true
368 return false
371 // qDecode decodes a Q encoded string.
372 func qDecode(s string) ([]byte, error) {
373 dec := make([]byte, len(s))
374 n := 0
375 for i := 0; i < len(s); i++ {
376 switch c := s[i]; {
377 case c == '_':
378 dec[n] = ' '
379 case c == '=':
380 if i+2 >= len(s) {
381 return nil, errInvalidWord
383 b, err := readHexByte(s[i+1], s[i+2])
384 if err != nil {
385 return nil, err
387 dec[n] = b
388 i += 2
389 case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t':
390 dec[n] = c
391 default:
392 return nil, errInvalidWord
397 return dec[:n], nil
400 // readHexByte returns the byte from its quoted-printable representation.
401 func readHexByte(a, b byte) (byte, error) {
402 var hb, lb byte
403 var err error
404 if hb, err = fromHex(a); err != nil {
405 return 0, err
407 if lb, err = fromHex(b); err != nil {
408 return 0, err
410 return hb<<4 | lb, nil
413 func fromHex(b byte) (byte, error) {
414 switch {
415 case b >= '0' && b <= '9':
416 return b - '0', nil
417 case b >= 'A' && b <= 'F':
418 return b - 'A' + 10, nil
419 // Accept badly encoded bytes.
420 case b >= 'a' && b <= 'f':
421 return b - 'a' + 10, nil
423 return 0, fmt.Errorf("mime: invalid hex byte %#02x", b)
426 var bufPool = sync.Pool{
427 New: func() interface{} {
428 return new(bytes.Buffer)
432 func getBuffer() *bytes.Buffer {
433 return bufPool.Get().(*bytes.Buffer)
436 func putBuffer(buf *bytes.Buffer) {
437 if buf.Len() > 1024 {
438 return
440 buf.Reset()
441 bufPool.Put(buf)