1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // This webpage shows layout of YV12 and other YUV formats
6 // http://www.fourcc.org/yuv.php
7 // The actual conversion is best described here
8 // http://en.wikipedia.org/wiki/YUV
9 // An article on optimizing YUV conversion using tables instead of multiplies
10 // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf
12 // YV12 is a full plane of Y and a half height, half width chroma planes
13 // YV16 is a full plane of Y and a full height, half width chroma planes
14 // YV24 is a full plane of Y and a full height, full width chroma planes
16 // ARGB pixel format is output, which on little endian is stored as BGRA.
17 // The alpha is set to 255, allowing the application to use RGBA or RGB32.
19 #include "yuv_convert.h"
21 // Header for low level row functions.
23 #include "mozilla/SSE.h"
29 // 16.16 fixed point arithmetic
30 const int kFractionBits
= 16;
31 const int kFractionMax
= 1 << kFractionBits
;
32 const int kFractionMask
= ((1 << kFractionBits
) - 1);
34 NS_GFX_(YUVType
) TypeFromSize(int ywidth
,
39 if (ywidth
== cbcrwidth
&& yheight
== cbcrheight
) {
42 else if (ywidth
/ 2 == cbcrwidth
&& yheight
== cbcrheight
) {
50 // Convert a frame of YUV to 32 bit ARGB.
51 NS_GFX_(void) ConvertYCbCrToRGB32(const uint8
* y_buf
,
63 unsigned int y_shift
= yuv_type
== YV12
? 1 : 0;
64 unsigned int x_shift
= yuv_type
== YV24
? 0 : 1;
65 // Test for SSE because the optimized code uses movntq, which is not part of MMX.
66 bool has_sse
= supports_mmx() && supports_sse();
67 // There is no optimized YV24 SSE routine so we check for this and
68 // fall back to the C code.
69 has_sse
&= yuv_type
!= YV24
;
70 bool odd_pic_x
= yuv_type
!= YV24
&& pic_x
% 2 != 0;
71 int x_width
= odd_pic_x
? pic_width
- 1 : pic_width
;
73 for (int y
= pic_y
; y
< pic_height
+ pic_y
; ++y
) {
74 uint8
* rgb_row
= rgb_buf
+ (y
- pic_y
) * rgb_pitch
;
75 const uint8
* y_ptr
= y_buf
+ y
* y_pitch
+ pic_x
;
76 const uint8
* u_ptr
= u_buf
+ (y
>> y_shift
) * uv_pitch
+ (pic_x
>> x_shift
);
77 const uint8
* v_ptr
= v_buf
+ (y
>> y_shift
) * uv_pitch
+ (pic_x
>> x_shift
);
80 // Handle the single odd pixel manually and use the
81 // fast routines for the remaining.
82 FastConvertYUVToRGB32Row_C(y_ptr
++,
92 FastConvertYUVToRGB32Row(y_ptr
,
99 FastConvertYUVToRGB32Row_C(y_ptr
,
108 // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
113 // C version does 8 at a time to mimic MMX code
114 static void FilterRows_C(uint8
* ybuf
, const uint8
* y0_ptr
, const uint8
* y1_ptr
,
115 int source_width
, int source_y_fraction
) {
116 int y1_fraction
= source_y_fraction
;
117 int y0_fraction
= 256 - y1_fraction
;
118 uint8
* end
= ybuf
+ source_width
;
120 ybuf
[0] = (y0_ptr
[0] * y0_fraction
+ y1_ptr
[0] * y1_fraction
) >> 8;
121 ybuf
[1] = (y0_ptr
[1] * y0_fraction
+ y1_ptr
[1] * y1_fraction
) >> 8;
122 ybuf
[2] = (y0_ptr
[2] * y0_fraction
+ y1_ptr
[2] * y1_fraction
) >> 8;
123 ybuf
[3] = (y0_ptr
[3] * y0_fraction
+ y1_ptr
[3] * y1_fraction
) >> 8;
124 ybuf
[4] = (y0_ptr
[4] * y0_fraction
+ y1_ptr
[4] * y1_fraction
) >> 8;
125 ybuf
[5] = (y0_ptr
[5] * y0_fraction
+ y1_ptr
[5] * y1_fraction
) >> 8;
126 ybuf
[6] = (y0_ptr
[6] * y0_fraction
+ y1_ptr
[6] * y1_fraction
) >> 8;
127 ybuf
[7] = (y0_ptr
[7] * y0_fraction
+ y1_ptr
[7] * y1_fraction
) >> 8;
131 } while (ybuf
< end
);
134 #ifdef MOZILLA_MAY_SUPPORT_MMX
135 void FilterRows_MMX(uint8
* ybuf
, const uint8
* y0_ptr
, const uint8
* y1_ptr
,
136 int source_width
, int source_y_fraction
);
139 #ifdef MOZILLA_MAY_SUPPORT_SSE2
140 void FilterRows_SSE2(uint8
* ybuf
, const uint8
* y0_ptr
, const uint8
* y1_ptr
,
141 int source_width
, int source_y_fraction
);
144 static inline void FilterRows(uint8
* ybuf
, const uint8
* y0_ptr
,
145 const uint8
* y1_ptr
, int source_width
,
146 int source_y_fraction
) {
147 #ifdef MOZILLA_MAY_SUPPORT_SSE2
148 if (mozilla::supports_sse2()) {
149 FilterRows_SSE2(ybuf
, y0_ptr
, y1_ptr
, source_width
, source_y_fraction
);
154 #ifdef MOZILLA_MAY_SUPPORT_MMX
155 if (mozilla::supports_mmx()) {
156 FilterRows_MMX(ybuf
, y0_ptr
, y1_ptr
, source_width
, source_y_fraction
);
161 FilterRows_C(ybuf
, y0_ptr
, y1_ptr
, source_width
, source_y_fraction
);
165 // Scale a frame of YUV to 32 bit ARGB.
166 NS_GFX_(void) ScaleYCbCrToRGB32(const uint8
* y_buf
,
179 ScaleFilter filter
) {
180 bool has_mmx
= supports_mmx();
182 // 4096 allows 3 buffers to fit in 12k.
183 // Helps performance on CPU with 16K L1 cache.
184 // Large enough for 3830x2160 and 30" displays which are 2560x1600.
185 const int kFilterBufferSize
= 4096;
186 // Disable filtering if the screen is too big (to avoid buffer overflows).
187 // This should never happen to regular users: they don't have monitors
188 // wider than 4096 pixels.
189 // TODO(fbarchard): Allow rotated videos to filter.
190 if (source_width
> kFilterBufferSize
|| view_rotate
)
191 filter
= FILTER_NONE
;
193 unsigned int y_shift
= yuv_type
== YV12
? 1 : 0;
194 // Diagram showing origin and direction of source sampling.
200 // Rotations that start at right side of image.
201 if ((view_rotate
== ROTATE_180
) ||
202 (view_rotate
== ROTATE_270
) ||
203 (view_rotate
== MIRROR_ROTATE_0
) ||
204 (view_rotate
== MIRROR_ROTATE_90
)) {
205 y_buf
+= source_width
- 1;
206 u_buf
+= source_width
/ 2 - 1;
207 v_buf
+= source_width
/ 2 - 1;
208 source_width
= -source_width
;
210 // Rotations that start at bottom of image.
211 if ((view_rotate
== ROTATE_90
) ||
212 (view_rotate
== ROTATE_180
) ||
213 (view_rotate
== MIRROR_ROTATE_90
) ||
214 (view_rotate
== MIRROR_ROTATE_180
)) {
215 y_buf
+= (source_height
- 1) * y_pitch
;
216 u_buf
+= ((source_height
>> y_shift
) - 1) * uv_pitch
;
217 v_buf
+= ((source_height
>> y_shift
) - 1) * uv_pitch
;
218 source_height
= -source_height
;
221 // Handle zero sized destination.
222 if (width
== 0 || height
== 0)
224 int source_dx
= source_width
* kFractionMax
/ width
;
225 int source_dy
= source_height
* kFractionMax
/ height
;
226 int source_dx_uv
= source_dx
;
228 if ((view_rotate
== ROTATE_90
) ||
229 (view_rotate
== ROTATE_270
)) {
234 source_height
= source_width
;
236 int original_dx
= source_dx
;
237 int original_dy
= source_dy
;
238 source_dx
= ((original_dy
>> kFractionBits
) * y_pitch
) << kFractionBits
;
239 source_dx_uv
= ((original_dy
>> kFractionBits
) * uv_pitch
) << kFractionBits
;
240 source_dy
= original_dx
;
241 if (view_rotate
== ROTATE_90
) {
244 source_height
= -source_height
;
251 // Need padding because FilterRows() will write 1 to 16 extra pixels
252 // after the end for SSE2 version.
253 uint8 yuvbuf
[16 + kFilterBufferSize
* 3 + 16];
255 reinterpret_cast<uint8
*>(reinterpret_cast<uintptr_t>(yuvbuf
+ 15) & ~15);
256 uint8
* ubuf
= ybuf
+ kFilterBufferSize
;
257 uint8
* vbuf
= ubuf
+ kFilterBufferSize
;
258 // TODO(fbarchard): Fixed point math is off by 1 on negatives.
259 int yscale_fixed
= (source_height
<< kFractionBits
) / height
;
261 // TODO(fbarchard): Split this into separate function for better efficiency.
262 for (int y
= 0; y
< height
; ++y
) {
263 uint8
* dest_pixel
= rgb_buf
+ y
* rgb_pitch
;
264 int source_y_subpixel
= (y
* yscale_fixed
);
265 if (yscale_fixed
>= (kFractionMax
* 2)) {
266 source_y_subpixel
+= kFractionMax
/ 2; // For 1/2 or less, center filter.
268 int source_y
= source_y_subpixel
>> kFractionBits
;
270 const uint8
* y0_ptr
= y_buf
+ source_y
* y_pitch
;
271 const uint8
* y1_ptr
= y0_ptr
+ y_pitch
;
273 const uint8
* u0_ptr
= u_buf
+ (source_y
>> y_shift
) * uv_pitch
;
274 const uint8
* u1_ptr
= u0_ptr
+ uv_pitch
;
275 const uint8
* v0_ptr
= v_buf
+ (source_y
>> y_shift
) * uv_pitch
;
276 const uint8
* v1_ptr
= v0_ptr
+ uv_pitch
;
278 // vertical scaler uses 16.8 fixed point
279 int source_y_fraction
= (source_y_subpixel
& kFractionMask
) >> 8;
280 int source_uv_fraction
=
281 ((source_y_subpixel
>> y_shift
) & kFractionMask
) >> 8;
283 const uint8
* y_ptr
= y0_ptr
;
284 const uint8
* u_ptr
= u0_ptr
;
285 const uint8
* v_ptr
= v0_ptr
;
286 // Apply vertical filtering if necessary.
287 // TODO(fbarchard): Remove memcpy when not necessary.
288 if (filter
& mozilla::gfx::FILTER_BILINEAR_V
) {
289 if (yscale_fixed
!= kFractionMax
&&
290 source_y_fraction
&& ((source_y
+ 1) < source_height
)) {
291 FilterRows(ybuf
, y0_ptr
, y1_ptr
, source_width
, source_y_fraction
);
293 memcpy(ybuf
, y0_ptr
, source_width
);
296 ybuf
[source_width
] = ybuf
[source_width
-1];
297 int uv_source_width
= (source_width
+ 1) / 2;
298 if (yscale_fixed
!= kFractionMax
&&
299 source_uv_fraction
&&
300 (((source_y
>> y_shift
) + 1) < (source_height
>> y_shift
))) {
301 FilterRows(ubuf
, u0_ptr
, u1_ptr
, uv_source_width
, source_uv_fraction
);
302 FilterRows(vbuf
, v0_ptr
, v1_ptr
, uv_source_width
, source_uv_fraction
);
304 memcpy(ubuf
, u0_ptr
, uv_source_width
);
305 memcpy(vbuf
, v0_ptr
, uv_source_width
);
309 ubuf
[uv_source_width
] = ubuf
[uv_source_width
- 1];
310 vbuf
[uv_source_width
] = vbuf
[uv_source_width
- 1];
312 if (source_dx
== kFractionMax
) { // Not scaled
313 FastConvertYUVToRGB32Row(y_ptr
, u_ptr
, v_ptr
,
315 } else if (filter
& FILTER_BILINEAR_H
) {
316 LinearScaleYUVToRGB32Row(y_ptr
, u_ptr
, v_ptr
,
317 dest_pixel
, width
, source_dx
);
319 // Specialized scalers and rotation.
320 #if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_MSC_VER) && defined(_M_IX86)
321 if(mozilla::supports_sse()) {
322 if (width
== (source_width
* 2)) {
323 DoubleYUVToRGB32Row_SSE(y_ptr
, u_ptr
, v_ptr
,
325 } else if ((source_dx
& kFractionMask
) == 0) {
326 // Scaling by integer scale factor. ie half.
327 ConvertYUVToRGB32Row_SSE(y_ptr
, u_ptr
, v_ptr
,
329 source_dx
>> kFractionBits
);
330 } else if (source_dx_uv
== source_dx
) { // Not rotated.
331 ScaleYUVToRGB32Row(y_ptr
, u_ptr
, v_ptr
,
332 dest_pixel
, width
, source_dx
);
334 RotateConvertYUVToRGB32Row_SSE(y_ptr
, u_ptr
, v_ptr
,
336 source_dx
>> kFractionBits
,
337 source_dx_uv
>> kFractionBits
);
341 ScaleYUVToRGB32Row_C(y_ptr
, u_ptr
, v_ptr
,
342 dest_pixel
, width
, source_dx
);
346 ScaleYUVToRGB32Row(y_ptr
, u_ptr
, v_ptr
,
347 dest_pixel
, width
, source_dx
);
351 // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms.
357 } // namespace mozilla