1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package jpeg implements a JPEG image decoder and encoder.
7 // JPEG is defined in ITU-T T.81: http://www.w3.org/Graphics/JPEG/itu-t81.pdf.
13 "image/internal/imageutil"
17 // TODO(nigeltao): fix up the doc comment style so that sentences start with
18 // the name of the type or function that they annotate.
20 // A FormatError reports that the input is not a valid JPEG.
21 type FormatError
string
23 func (e FormatError
) Error() string { return "invalid JPEG format: " + string(e
) }
25 // An UnsupportedError reports that the input uses a valid but unimplemented JPEG feature.
26 type UnsupportedError
string
28 func (e UnsupportedError
) Error() string { return "unsupported JPEG feature: " + string(e
) }
30 var errUnsupportedSubsamplingRatio
= UnsupportedError("luma/chroma subsampling ratio")
32 // Component specification, specified in section B.2.2.
33 type component
struct {
34 h
int // Horizontal sampling factor.
35 v
int // Vertical sampling factor.
36 c
uint8 // Component identifier.
37 tq
uint8 // Quantization table destination selector.
51 sof0Marker
= 0xc0 // Start Of Frame (Baseline Sequential).
52 sof1Marker
= 0xc1 // Start Of Frame (Extended Sequential).
53 sof2Marker
= 0xc2 // Start Of Frame (Progressive).
54 dhtMarker
= 0xc4 // Define Huffman Table.
55 rst0Marker
= 0xd0 // ReSTart (0).
56 rst7Marker
= 0xd7 // ReSTart (7).
57 soiMarker
= 0xd8 // Start Of Image.
58 eoiMarker
= 0xd9 // End Of Image.
59 sosMarker
= 0xda // Start Of Scan.
60 dqtMarker
= 0xdb // Define Quantization Table.
61 driMarker
= 0xdd // Define Restart Interval.
62 comMarker
= 0xfe // COMment.
63 // "APPlication specific" markers aren't part of the JPEG spec per se,
64 // but in practice, their use is described at
65 // http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html
71 // See http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html#Adobe
73 adobeTransformUnknown
= 0
74 adobeTransformYCbCr
= 1
75 adobeTransformYCbCrK
= 2
78 // unzig maps from the zig-zag ordering to the natural ordering. For example,
79 // unzig[3] is the column and row of the fourth element in zig-zag order. The
80 // value is 16, which means first column (16%8 == 0) and third row (16/8 == 2).
81 var unzig
= [blockSize
]int{
82 0, 1, 8, 16, 9, 2, 3, 10,
83 17, 24, 32, 25, 18, 11, 4, 5,
84 12, 19, 26, 33, 40, 48, 41, 34,
85 27, 20, 13, 6, 7, 14, 21, 28,
86 35, 42, 49, 56, 57, 50, 43, 36,
87 29, 22, 15, 23, 30, 37, 44, 51,
88 58, 59, 52, 45, 38, 31, 39, 46,
89 53, 60, 61, 54, 47, 55, 62, 63,
92 // Deprecated: Reader is deprecated.
93 type Reader
interface {
98 // bits holds the unprocessed bits that have been taken from the byte-stream.
99 // The n least significant bits of a form the unread bits, to be read in MSB to
102 a
uint32 // accumulator.
103 m
uint32 // mask. m==1<<(n-1) when n>0, with m==0 when n==0.
104 n
int32 // the number of unread bits in a.
107 type decoder
struct {
110 // bytes is a byte buffer, similar to a bufio.Reader, except that it
111 // has to be able to unread more than 1 byte, due to byte stuffing.
112 // Byte stuffing is specified in section F.1.2.3.
114 // buf[i:j] are the buffered bytes read from the underlying
115 // io.Reader that haven't yet been passed further on.
118 // nUnreadable is the number of bytes to back up i after
119 // overshooting. It can be 0, 1 or 2.
129 ri
int // Restart Interval.
132 // As per section 4.5, there are four modes of operation (selected by the
133 // SOF? markers): sequential DCT, progressive DCT, lossless and
134 // hierarchical, although this implementation does not support the latter
135 // two non-DCT modes. Sequential DCT is further split into baseline and
136 // extended, as per section 4.11.
141 adobeTransformValid
bool
143 eobRun
uint16 // End-of-Band run, specified in section G.1.2.2.
145 comp
[maxComponents
]component
146 progCoeffs
[maxComponents
][]block
// Saved state between progressive-mode scans.
147 huff
[maxTc
+ 1][maxTh
+ 1]huffman
148 quant
[maxTq
+ 1]block
// Quantization tables, in zig-zag order.
149 tmp
[2 * blockSize
]byte
152 // fill fills up the d.bytes.buf buffer from the underlying io.Reader. It
153 // should only be called when there are no unread bytes in d.bytes.
154 func (d
*decoder
) fill() error
{
155 if d
.bytes
.i
!= d
.bytes
.j
{
156 panic("jpeg: fill called when unread bytes exist")
158 // Move the last 2 bytes to the start of the buffer, in case we need
159 // to call unreadByteStuffedByte.
161 d
.bytes
.buf
[0] = d
.bytes
.buf
[d
.bytes
.j
-2]
162 d
.bytes
.buf
[1] = d
.bytes
.buf
[d
.bytes
.j
-1]
163 d
.bytes
.i
, d
.bytes
.j
= 2, 2
165 // Fill in the rest of the buffer.
166 n
, err
:= d
.r
.Read(d
.bytes
.buf
[d
.bytes
.j
:])
174 // unreadByteStuffedByte undoes the most recent readByteStuffedByte call,
175 // giving a byte of data back from d.bits to d.bytes. The Huffman look-up table
176 // requires at least 8 bits for look-up, which means that Huffman decoding can
177 // sometimes overshoot and read one or two too many bytes. Two-byte overshoot
178 // can happen when expecting to read a 0xff 0x00 byte-stuffed byte.
179 func (d
*decoder
) unreadByteStuffedByte() {
180 d
.bytes
.i
-= d
.bytes
.nUnreadable
181 d
.bytes
.nUnreadable
= 0
189 // readByte returns the next byte, whether buffered or not buffered. It does
190 // not care about byte stuffing.
191 func (d
*decoder
) readByte() (x
byte, err error
) {
192 for d
.bytes
.i
== d
.bytes
.j
{
193 if err
= d
.fill(); err
!= nil {
197 x
= d
.bytes
.buf
[d
.bytes
.i
]
199 d
.bytes
.nUnreadable
= 0
203 // errMissingFF00 means that readByteStuffedByte encountered an 0xff byte (a
204 // marker byte) that wasn't the expected byte-stuffed sequence 0xff, 0x00.
205 var errMissingFF00
= FormatError("missing 0xff00 sequence")
207 // readByteStuffedByte is like readByte but is for byte-stuffed Huffman data.
208 func (d
*decoder
) readByteStuffedByte() (x
byte, err error
) {
209 // Take the fast path if d.bytes.buf contains at least two bytes.
210 if d
.bytes
.i
+2 <= d
.bytes
.j
{
211 x
= d
.bytes
.buf
[d
.bytes
.i
]
213 d
.bytes
.nUnreadable
= 1
217 if d
.bytes
.buf
[d
.bytes
.i
] != 0x00 {
218 return 0, errMissingFF00
221 d
.bytes
.nUnreadable
= 2
225 d
.bytes
.nUnreadable
= 0
227 x
, err
= d
.readByte()
231 d
.bytes
.nUnreadable
= 1
236 x
, err
= d
.readByte()
240 d
.bytes
.nUnreadable
= 2
242 return 0, errMissingFF00
247 // readFull reads exactly len(p) bytes into p. It does not care about byte
249 func (d
*decoder
) readFull(p
[]byte) error
{
250 // Unread the overshot bytes, if any.
251 if d
.bytes
.nUnreadable
!= 0 {
253 d
.unreadByteStuffedByte()
255 d
.bytes
.nUnreadable
= 0
259 n
:= copy(p
, d
.bytes
.buf
[d
.bytes
.i
:d
.bytes
.j
])
265 if err
:= d
.fill(); err
!= nil {
267 err
= io
.ErrUnexpectedEOF
275 // ignore ignores the next n bytes.
276 func (d
*decoder
) ignore(n
int) error
{
277 // Unread the overshot bytes, if any.
278 if d
.bytes
.nUnreadable
!= 0 {
280 d
.unreadByteStuffedByte()
282 d
.bytes
.nUnreadable
= 0
286 m
:= d
.bytes
.j
- d
.bytes
.i
295 if err
:= d
.fill(); err
!= nil {
297 err
= io
.ErrUnexpectedEOF
305 // Specified in section B.2.2.
306 func (d
*decoder
) processSOF(n
int) error
{
308 return FormatError("multiple SOF markers")
311 case 6 + 3*1: // Grayscale image.
313 case 6 + 3*3: // YCbCr or RGB image.
315 case 6 + 3*4: // YCbCrK or CMYK image.
318 return UnsupportedError("number of components")
320 if err
:= d
.readFull(d
.tmp
[:n
]); err
!= nil {
323 // We only support 8-bit precision.
325 return UnsupportedError("precision")
327 d
.height
= int(d
.tmp
[1])<<8 + int(d
.tmp
[2])
328 d
.width
= int(d
.tmp
[3])<<8 + int(d
.tmp
[4])
329 if int(d
.tmp
[5]) != d
.nComp
{
330 return FormatError("SOF has wrong length")
333 for i
:= 0; i
< d
.nComp
; i
++ {
334 d
.comp
[i
].c
= d
.tmp
[6+3*i
]
335 // Section B.2.2 states that "the value of C_i shall be different from
336 // the values of C_1 through C_(i-1)".
337 for j
:= 0; j
< i
; j
++ {
338 if d
.comp
[i
].c
== d
.comp
[j
].c
{
339 return FormatError("repeated component identifier")
343 d
.comp
[i
].tq
= d
.tmp
[8+3*i
]
344 if d
.comp
[i
].tq
> maxTq
{
345 return FormatError("bad Tq value")
349 h
, v
:= int(hv
>>4), int(hv
&0x0f)
350 if h
< 1 ||
4 < h || v
< 1 ||
4 < v
{
351 return FormatError("luma/chroma subsampling ratio")
353 if h
== 3 || v
== 3 {
354 return errUnsupportedSubsamplingRatio
358 // If a JPEG image has only one component, section A.2 says "this data
359 // is non-interleaved by definition" and section A.2.2 says "[in this
360 // case...] the order of data units within a scan shall be left-to-right
361 // and top-to-bottom... regardless of the values of H_1 and V_1". Section
362 // 4.8.2 also says "[for non-interleaved data], the MCU is defined to be
363 // one data unit". Similarly, section A.1.1 explains that it is the ratio
364 // of H_i to max_j(H_j) that matters, and similarly for V. For grayscale
365 // images, H_1 is the maximum H_j for all components j, so that ratio is
366 // always 1. The component's (h, v) is effectively always (1, 1): even if
367 // the nominal (h, v) is (2, 1), a 20x5 image is encoded in three 8x8
368 // MCUs, not two 16x8 MCUs.
372 // For YCbCr images, we only support 4:4:4, 4:4:0, 4:2:2, 4:2:0,
373 // 4:1:1 or 4:1:0 chroma subsampling ratios. This implies that the
374 // (h, v) values for the Y component are either (1, 1), (1, 2),
375 // (2, 1), (2, 2), (4, 1) or (4, 2), and the Y component's values
376 // must be a multiple of the Cb and Cr component's values. We also
377 // assume that the two chroma components have the same subsampling
381 // We have already verified, above, that h and v are both
382 // either 1, 2 or 4, so invalid (h, v) combinations are those
385 return errUnsupportedSubsamplingRatio
388 if d
.comp
[0].h%h
!= 0 || d
.comp
[0].v%v
!= 0 {
389 return errUnsupportedSubsamplingRatio
392 if d
.comp
[1].h
!= h || d
.comp
[1].v
!= v
{
393 return errUnsupportedSubsamplingRatio
398 // For 4-component images (either CMYK or YCbCrK), we only support two
399 // hv vectors: [0x11 0x11 0x11 0x11] and [0x22 0x11 0x11 0x22].
400 // Theoretically, 4-component JPEG images could mix and match hv values
401 // but in practice, those two combinations are the only ones in use,
402 // and it simplifies the applyBlack code below if we can assume that:
403 // - for CMYK, the C and K channels have full samples, and if the M
404 // and Y channels subsample, they subsample both horizontally and
406 // - for YCbCrK, the Y and K channels have full samples.
409 if hv
!= 0x11 && hv
!= 0x22 {
410 return errUnsupportedSubsamplingRatio
414 return errUnsupportedSubsamplingRatio
417 if d
.comp
[0].h
!= h || d
.comp
[0].v
!= v
{
418 return errUnsupportedSubsamplingRatio
429 // Specified in section B.2.4.1.
430 func (d
*decoder
) processDQT(n
int) error
{
434 x
, err
:= d
.readByte()
440 return FormatError("bad Tq value")
444 return FormatError("bad Pq value")
450 if err
:= d
.readFull(d
.tmp
[:blockSize
]); err
!= nil {
453 for i
:= range d
.quant
[tq
] {
454 d
.quant
[tq
][i
] = int32(d
.tmp
[i
])
461 if err
:= d
.readFull(d
.tmp
[:2*blockSize
]); err
!= nil {
464 for i
:= range d
.quant
[tq
] {
465 d
.quant
[tq
][i
] = int32(d
.tmp
[2*i
])<<8 |
int32(d
.tmp
[2*i
+1])
470 return FormatError("DQT has wrong length")
475 // Specified in section B.2.4.4.
476 func (d
*decoder
) processDRI(n
int) error
{
478 return FormatError("DRI has wrong length")
480 if err
:= d
.readFull(d
.tmp
[:2]); err
!= nil {
483 d
.ri
= int(d
.tmp
[0])<<8 + int(d
.tmp
[1])
487 func (d
*decoder
) processApp0Marker(n
int) error
{
491 if err
:= d
.readFull(d
.tmp
[:5]); err
!= nil {
496 d
.jfif
= d
.tmp
[0] == 'J' && d
.tmp
[1] == 'F' && d
.tmp
[2] == 'I' && d
.tmp
[3] == 'F' && d
.tmp
[4] == '\x00'
504 func (d
*decoder
) processApp14Marker(n
int) error
{
508 if err
:= d
.readFull(d
.tmp
[:12]); err
!= nil {
513 if d
.tmp
[0] == 'A' && d
.tmp
[1] == 'd' && d
.tmp
[2] == 'o' && d
.tmp
[3] == 'b' && d
.tmp
[4] == 'e' {
514 d
.adobeTransformValid
= true
515 d
.adobeTransform
= d
.tmp
[11]
524 // decode reads a JPEG image from r and returns it as an image.Image.
525 func (d
*decoder
) decode(r io
.Reader
, configOnly
bool) (image
.Image
, error
) {
528 // Check for the Start Of Image marker.
529 if err
:= d
.readFull(d
.tmp
[:2]); err
!= nil {
532 if d
.tmp
[0] != 0xff || d
.tmp
[1] != soiMarker
{
533 return nil, FormatError("missing SOI marker")
536 // Process the remaining segments until the End Of Image marker.
538 err
:= d
.readFull(d
.tmp
[:2])
542 for d
.tmp
[0] != 0xff {
543 // Strictly speaking, this is a format error. However, libjpeg is
544 // liberal in what it accepts. As of version 9, next_marker in
545 // jdmarker.c treats this as a warning (JWRN_EXTRANEOUS_DATA) and
546 // continues to decode the stream. Even before next_marker sees
547 // extraneous data, jpeg_fill_bit_buffer in jdhuff.c reads as many
548 // bytes as it can, possibly past the end of a scan's data. It
549 // effectively puts back any markers that it overscanned (e.g. an
550 // "\xff\xd9" EOI marker), but it does not put back non-marker data,
551 // and thus it can silently ignore a small number of extraneous
552 // non-marker bytes before next_marker has a chance to see them (and
555 // We are therefore also liberal in what we accept. Extraneous data
556 // is silently ignored.
558 // This is similar to, but not exactly the same as, the restart
559 // mechanism within a scan (the RST[0-7] markers).
561 // Note that extraneous 0xff bytes in e.g. SOS data are escaped as
562 // "\xff\x00", and so are detected a little further down below.
564 d
.tmp
[1], err
= d
.readByte()
571 // Treat "\xff\x00" as extraneous data.
575 // Section B.1.1.2 says, "Any marker may optionally be preceded by any
576 // number of fill bytes, which are bytes assigned code X'FF'".
577 marker
, err
= d
.readByte()
582 if marker
== eoiMarker
{ // End Of Image.
585 if rst0Marker
<= marker
&& marker
<= rst7Marker
{
586 // Figures B.2 and B.16 of the specification suggest that restart markers should
587 // only occur between Entropy Coded Segments and not after the final ECS.
588 // However, some encoders may generate incorrect JPEGs with a final restart
589 // marker. That restart marker will be seen here instead of inside the processSOS
590 // method, and is ignored as a harmless error. Restart markers have no extra data,
591 // so we check for this before we read the 16-bit length of the segment.
595 // Read the 16-bit length of the segment. The value includes the 2 bytes for the
596 // length itself, so we subtract 2 to get the number of remaining bytes.
597 if err
= d
.readFull(d
.tmp
[:2]); err
!= nil {
600 n
:= int(d
.tmp
[0])<<8 + int(d
.tmp
[1]) - 2
602 return nil, FormatError("short segment length")
606 case sof0Marker
, sof1Marker
, sof2Marker
:
607 d
.baseline
= marker
== sof0Marker
608 d
.progressive
= marker
== sof2Marker
609 err
= d
.processSOF(n
)
610 if configOnly
&& d
.jfif
{
617 err
= d
.processDHT(n
)
623 err
= d
.processDQT(n
)
629 err
= d
.processSOS(n
)
634 err
= d
.processDRI(n
)
637 err
= d
.processApp0Marker(n
)
639 err
= d
.processApp14Marker(n
)
641 if app0Marker
<= marker
&& marker
<= app15Marker || marker
== comMarker
{
643 } else if marker
< 0xc0 { // See Table B.1 "Marker code assignments".
644 err
= FormatError("unknown marker")
646 err
= UnsupportedError("unknown marker")
655 if err
:= d
.reconstructProgressiveImage(); err
!= nil {
663 if d
.blackPix
!= nil {
664 return d
.applyBlack()
665 } else if d
.isRGB() {
666 return d
.convertToRGB()
670 return nil, FormatError("missing SOS marker")
673 // applyBlack combines d.img3 and d.blackPix into a CMYK image. The formula
674 // used depends on whether the JPEG image is stored as CMYK or YCbCrK,
675 // indicated by the APP14 (Adobe) metadata.
677 // Adobe CMYK JPEG images are inverted, where 255 means no ink instead of full
678 // ink, so we apply "v = 255 - v" at various points. Note that a double
679 // inversion is a no-op, so inversions might be implicit in the code below.
680 func (d
*decoder
) applyBlack() (image
.Image
, error
) {
681 if !d
.adobeTransformValid
{
682 return nil, UnsupportedError("unknown color model: 4-component JPEG doesn't have Adobe APP14 metadata")
685 // If the 4-component JPEG image isn't explicitly marked as "Unknown (RGB
687 // http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html#Adobe
688 // we assume that it is YCbCrK. This matches libjpeg's jdapimin.c.
689 if d
.adobeTransform
!= adobeTransformUnknown
{
690 // Convert the YCbCr part of the YCbCrK to RGB, invert the RGB to get
691 // CMY, and patch in the original K. The RGB to CMY inversion cancels
692 // out the 'Adobe inversion' described in the applyBlack doc comment
693 // above, so in practice, only the fourth channel (black) is inverted.
694 bounds
:= d
.img3
.Bounds()
695 img
:= image
.NewRGBA(bounds
)
696 imageutil
.DrawYCbCr(img
, bounds
, d
.img3
, bounds
.Min
)
697 for iBase
, y
:= 0, bounds
.Min
.Y
; y
< bounds
.Max
.Y
; iBase
, y
= iBase
+img
.Stride
, y
+1 {
698 for i
, x
:= iBase
+3, bounds
.Min
.X
; x
< bounds
.Max
.X
; i
, x
= i
+4, x
+1 {
699 img
.Pix
[i
] = 255 - d
.blackPix
[(y
-bounds
.Min
.Y
)*d
.blackStride
+(x
-bounds
.Min
.X
)]
709 // The first three channels (cyan, magenta, yellow) of the CMYK
710 // were decoded into d.img3, but each channel was decoded into a separate
711 // []byte slice, and some channels may be subsampled. We interleave the
712 // separate channels into an image.CMYK's single []byte slice containing 4
713 // contiguous bytes per pixel.
714 bounds
:= d
.img3
.Bounds()
715 img
:= image
.NewCMYK(bounds
)
717 translations
:= [4]struct {
721 {d
.img3
.Y
, d
.img3
.YStride
},
722 {d
.img3
.Cb
, d
.img3
.CStride
},
723 {d
.img3
.Cr
, d
.img3
.CStride
},
724 {d
.blackPix
, d
.blackStride
},
726 for t
, translation
:= range translations
{
727 subsample
:= d
.comp
[t
].h
!= d
.comp
[0].h || d
.comp
[t
].v
!= d
.comp
[0].v
728 for iBase
, y
:= 0, bounds
.Min
.Y
; y
< bounds
.Max
.Y
; iBase
, y
= iBase
+img
.Stride
, y
+1 {
729 sy
:= y
- bounds
.Min
.Y
733 for i
, x
:= iBase
+t
, bounds
.Min
.X
; x
< bounds
.Max
.X
; i
, x
= i
+4, x
+1 {
734 sx
:= x
- bounds
.Min
.X
738 img
.Pix
[i
] = 255 - translation
.src
[sy
*translation
.stride
+sx
]
745 func (d
*decoder
) isRGB() bool {
749 if d
.adobeTransformValid
&& d
.adobeTransform
== adobeTransformUnknown
{
750 // http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html#Adobe
751 // says that 0 means Unknown (and in practice RGB) and 1 means YCbCr.
754 return d
.comp
[0].c
== 'R' && d
.comp
[1].c
== 'G' && d
.comp
[2].c
== 'B'
757 func (d
*decoder
) convertToRGB() (image
.Image
, error
) {
758 cScale
:= d
.comp
[0].h
/ d
.comp
[1].h
759 bounds
:= d
.img3
.Bounds()
760 img
:= image
.NewRGBA(bounds
)
761 for y
:= bounds
.Min
.Y
; y
< bounds
.Max
.Y
; y
++ {
762 po
:= img
.PixOffset(bounds
.Min
.X
, y
)
763 yo
:= d
.img3
.YOffset(bounds
.Min
.X
, y
)
764 co
:= d
.img3
.COffset(bounds
.Min
.X
, y
)
765 for i
, iMax
:= 0, bounds
.Max
.X
-bounds
.Min
.X
; i
< iMax
; i
++ {
766 img
.Pix
[po
+4*i
+0] = d
.img3
.Y
[yo
+i
]
767 img
.Pix
[po
+4*i
+1] = d
.img3
.Cb
[co
+i
/cScale
]
768 img
.Pix
[po
+4*i
+2] = d
.img3
.Cr
[co
+i
/cScale
]
769 img
.Pix
[po
+4*i
+3] = 255
775 // Decode reads a JPEG image from r and returns it as an image.Image.
776 func Decode(r io
.Reader
) (image
.Image
, error
) {
778 return d
.decode(r
, false)
781 // DecodeConfig returns the color model and dimensions of a JPEG image without
782 // decoding the entire image.
783 func DecodeConfig(r io
.Reader
) (image
.Config
, error
) {
785 if _
, err
:= d
.decode(r
, true); err
!= nil {
786 return image
.Config
{}, err
791 ColorModel
: color
.GrayModel
,
796 cm
:= color
.YCbCrModel
807 ColorModel
: color
.CMYKModel
,
812 return image
.Config
{}, FormatError("missing SOF marker")
816 image
.RegisterFormat("jpeg", "\xff\xd8", Decode
, DecodeConfig
)