1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // This webpage shows layout of YV12 and other YUV formats
6 // http://www.fourcc.org/yuv.php
7 // The actual conversion is best described here
8 // http://en.wikipedia.org/wiki/YUV
9 // An article on optimizing YUV conversion using tables instead of multiplies
10 // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf
12 // YV12 is a full plane of Y and a half height, half width chroma planes
13 // YV16 is a full plane of Y and a full height, half width chroma planes
14 // YV24 is a full plane of Y and a full height, full width chroma planes
16 // ARGB pixel format is output, which on little endian is stored as BGRA.
17 // The alpha is set to 255, allowing the application to use RGBA or RGB32.
19 #include "yuv_convert.h"
21 // Header for low level row functions.
23 #include "mozilla/SSE.h"
25 #ifdef HAVE_YCBCR_TO_RGB565
26 void __attribute((noinline
)) yv12_to_rgb565_neon(uint16
*dst
, const uint8
*y
, const uint8
*u
, const uint8
*v
, int n
, int oddflag
);
33 // 16.16 fixed point arithmetic
34 const int kFractionBits
= 16;
35 const int kFractionMax
= 1 << kFractionBits
;
36 const int kFractionMask
= ((1 << kFractionBits
) - 1);
39 // Convert a frame of YUV to 16 bit RGB565.
40 NS_GFX_(void) ConvertYCbCrToRGB565(const uint8
* y_buf
,
53 #ifdef HAVE_YCBCR_TO_RGB565
54 for (int i
= 0; i
< pic_height
; i
++) {
55 yv12_to_rgb565_neon((uint16
*)rgb_buf
+ pic_width
* i
,
57 u_buf
+ uv_pitch
* (i
/ 2),
58 v_buf
+ uv_pitch
* (i
/ 2),
65 // Convert a frame of YUV to 32 bit ARGB.
66 NS_GFX_(void) ConvertYCbCrToRGB32(const uint8
* y_buf
,
78 unsigned int y_shift
= yuv_type
== YV12
? 1 : 0;
79 unsigned int x_shift
= yuv_type
== YV24
? 0 : 1;
80 // Test for SSE because the optimized code uses movntq, which is not part of MMX.
81 bool has_sse
= supports_mmx() && supports_sse();
82 // There is no optimized YV24 SSE routine so we check for this and
83 // fall back to the C code.
84 has_sse
&= yuv_type
!= YV24
;
85 bool odd_pic_x
= yuv_type
!= YV24
&& pic_x
% 2 != 0;
86 int x_width
= odd_pic_x
? pic_width
- 1 : pic_width
;
88 for (int y
= pic_y
; y
< pic_height
+ pic_y
; ++y
) {
89 uint8
* rgb_row
= rgb_buf
+ (y
- pic_y
) * rgb_pitch
;
90 const uint8
* y_ptr
= y_buf
+ y
* y_pitch
+ pic_x
;
91 const uint8
* u_ptr
= u_buf
+ (y
>> y_shift
) * uv_pitch
+ (pic_x
>> x_shift
);
92 const uint8
* v_ptr
= v_buf
+ (y
>> y_shift
) * uv_pitch
+ (pic_x
>> x_shift
);
95 // Handle the single odd pixel manually and use the
96 // fast routines for the remaining.
97 FastConvertYUVToRGB32Row_C(y_ptr
++,
107 FastConvertYUVToRGB32Row(y_ptr
,
114 FastConvertYUVToRGB32Row_C(y_ptr
,
123 // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
128 // C version does 8 at a time to mimic MMX code
129 static void FilterRows_C(uint8
* ybuf
, const uint8
* y0_ptr
, const uint8
* y1_ptr
,
130 int source_width
, int source_y_fraction
) {
131 int y1_fraction
= source_y_fraction
;
132 int y0_fraction
= 256 - y1_fraction
;
133 uint8
* end
= ybuf
+ source_width
;
135 ybuf
[0] = (y0_ptr
[0] * y0_fraction
+ y1_ptr
[0] * y1_fraction
) >> 8;
136 ybuf
[1] = (y0_ptr
[1] * y0_fraction
+ y1_ptr
[1] * y1_fraction
) >> 8;
137 ybuf
[2] = (y0_ptr
[2] * y0_fraction
+ y1_ptr
[2] * y1_fraction
) >> 8;
138 ybuf
[3] = (y0_ptr
[3] * y0_fraction
+ y1_ptr
[3] * y1_fraction
) >> 8;
139 ybuf
[4] = (y0_ptr
[4] * y0_fraction
+ y1_ptr
[4] * y1_fraction
) >> 8;
140 ybuf
[5] = (y0_ptr
[5] * y0_fraction
+ y1_ptr
[5] * y1_fraction
) >> 8;
141 ybuf
[6] = (y0_ptr
[6] * y0_fraction
+ y1_ptr
[6] * y1_fraction
) >> 8;
142 ybuf
[7] = (y0_ptr
[7] * y0_fraction
+ y1_ptr
[7] * y1_fraction
) >> 8;
146 } while (ybuf
< end
);
149 #ifdef MOZILLA_MAY_SUPPORT_MMX
150 void FilterRows_MMX(uint8
* ybuf
, const uint8
* y0_ptr
, const uint8
* y1_ptr
,
151 int source_width
, int source_y_fraction
);
154 #ifdef MOZILLA_MAY_SUPPORT_SSE2
155 void FilterRows_SSE2(uint8
* ybuf
, const uint8
* y0_ptr
, const uint8
* y1_ptr
,
156 int source_width
, int source_y_fraction
);
159 static inline void FilterRows(uint8
* ybuf
, const uint8
* y0_ptr
,
160 const uint8
* y1_ptr
, int source_width
,
161 int source_y_fraction
) {
162 #ifdef MOZILLA_MAY_SUPPORT_SSE2
163 if (mozilla::supports_sse2()) {
164 FilterRows_SSE2(ybuf
, y0_ptr
, y1_ptr
, source_width
, source_y_fraction
);
169 #ifdef MOZILLA_MAY_SUPPORT_MMX
170 if (mozilla::supports_mmx()) {
171 FilterRows_MMX(ybuf
, y0_ptr
, y1_ptr
, source_width
, source_y_fraction
);
176 FilterRows_C(ybuf
, y0_ptr
, y1_ptr
, source_width
, source_y_fraction
);
180 // Scale a frame of YUV to 32 bit ARGB.
181 NS_GFX_(void) ScaleYCbCrToRGB32(const uint8
* y_buf
,
194 ScaleFilter filter
) {
195 bool has_mmx
= supports_mmx();
197 // 4096 allows 3 buffers to fit in 12k.
198 // Helps performance on CPU with 16K L1 cache.
199 // Large enough for 3830x2160 and 30" displays which are 2560x1600.
200 const int kFilterBufferSize
= 4096;
201 // Disable filtering if the screen is too big (to avoid buffer overflows).
202 // This should never happen to regular users: they don't have monitors
203 // wider than 4096 pixels.
204 // TODO(fbarchard): Allow rotated videos to filter.
205 if (source_width
> kFilterBufferSize
|| view_rotate
)
206 filter
= FILTER_NONE
;
208 unsigned int y_shift
= yuv_type
== YV12
? 1 : 0;
209 // Diagram showing origin and direction of source sampling.
215 // Rotations that start at right side of image.
216 if ((view_rotate
== ROTATE_180
) ||
217 (view_rotate
== ROTATE_270
) ||
218 (view_rotate
== MIRROR_ROTATE_0
) ||
219 (view_rotate
== MIRROR_ROTATE_90
)) {
220 y_buf
+= source_width
- 1;
221 u_buf
+= source_width
/ 2 - 1;
222 v_buf
+= source_width
/ 2 - 1;
223 source_width
= -source_width
;
225 // Rotations that start at bottom of image.
226 if ((view_rotate
== ROTATE_90
) ||
227 (view_rotate
== ROTATE_180
) ||
228 (view_rotate
== MIRROR_ROTATE_90
) ||
229 (view_rotate
== MIRROR_ROTATE_180
)) {
230 y_buf
+= (source_height
- 1) * y_pitch
;
231 u_buf
+= ((source_height
>> y_shift
) - 1) * uv_pitch
;
232 v_buf
+= ((source_height
>> y_shift
) - 1) * uv_pitch
;
233 source_height
= -source_height
;
236 // Handle zero sized destination.
237 if (width
== 0 || height
== 0)
239 int source_dx
= source_width
* kFractionMax
/ width
;
240 int source_dy
= source_height
* kFractionMax
/ height
;
241 int source_dx_uv
= source_dx
;
243 if ((view_rotate
== ROTATE_90
) ||
244 (view_rotate
== ROTATE_270
)) {
249 source_height
= source_width
;
251 int original_dx
= source_dx
;
252 int original_dy
= source_dy
;
253 source_dx
= ((original_dy
>> kFractionBits
) * y_pitch
) << kFractionBits
;
254 source_dx_uv
= ((original_dy
>> kFractionBits
) * uv_pitch
) << kFractionBits
;
255 source_dy
= original_dx
;
256 if (view_rotate
== ROTATE_90
) {
259 source_height
= -source_height
;
266 // Need padding because FilterRows() will write 1 to 16 extra pixels
267 // after the end for SSE2 version.
268 uint8 yuvbuf
[16 + kFilterBufferSize
* 3 + 16];
270 reinterpret_cast<uint8
*>(reinterpret_cast<PRUptrdiff
>(yuvbuf
+ 15) & ~15);
271 uint8
* ubuf
= ybuf
+ kFilterBufferSize
;
272 uint8
* vbuf
= ubuf
+ kFilterBufferSize
;
273 // TODO(fbarchard): Fixed point math is off by 1 on negatives.
274 int yscale_fixed
= (source_height
<< kFractionBits
) / height
;
276 // TODO(fbarchard): Split this into separate function for better efficiency.
277 for (int y
= 0; y
< height
; ++y
) {
278 uint8
* dest_pixel
= rgb_buf
+ y
* rgb_pitch
;
279 int source_y_subpixel
= (y
* yscale_fixed
);
280 if (yscale_fixed
>= (kFractionMax
* 2)) {
281 source_y_subpixel
+= kFractionMax
/ 2; // For 1/2 or less, center filter.
283 int source_y
= source_y_subpixel
>> kFractionBits
;
285 const uint8
* y0_ptr
= y_buf
+ source_y
* y_pitch
;
286 const uint8
* y1_ptr
= y0_ptr
+ y_pitch
;
288 const uint8
* u0_ptr
= u_buf
+ (source_y
>> y_shift
) * uv_pitch
;
289 const uint8
* u1_ptr
= u0_ptr
+ uv_pitch
;
290 const uint8
* v0_ptr
= v_buf
+ (source_y
>> y_shift
) * uv_pitch
;
291 const uint8
* v1_ptr
= v0_ptr
+ uv_pitch
;
293 // vertical scaler uses 16.8 fixed point
294 int source_y_fraction
= (source_y_subpixel
& kFractionMask
) >> 8;
295 int source_uv_fraction
=
296 ((source_y_subpixel
>> y_shift
) & kFractionMask
) >> 8;
298 const uint8
* y_ptr
= y0_ptr
;
299 const uint8
* u_ptr
= u0_ptr
;
300 const uint8
* v_ptr
= v0_ptr
;
301 // Apply vertical filtering if necessary.
302 // TODO(fbarchard): Remove memcpy when not necessary.
303 if (filter
& mozilla::gfx::FILTER_BILINEAR_V
) {
304 if (yscale_fixed
!= kFractionMax
&&
305 source_y_fraction
&& ((source_y
+ 1) < source_height
)) {
306 FilterRows(ybuf
, y0_ptr
, y1_ptr
, source_width
, source_y_fraction
);
308 memcpy(ybuf
, y0_ptr
, source_width
);
311 ybuf
[source_width
] = ybuf
[source_width
-1];
312 int uv_source_width
= (source_width
+ 1) / 2;
313 if (yscale_fixed
!= kFractionMax
&&
314 source_uv_fraction
&&
315 (((source_y
>> y_shift
) + 1) < (source_height
>> y_shift
))) {
316 FilterRows(ubuf
, u0_ptr
, u1_ptr
, uv_source_width
, source_uv_fraction
);
317 FilterRows(vbuf
, v0_ptr
, v1_ptr
, uv_source_width
, source_uv_fraction
);
319 memcpy(ubuf
, u0_ptr
, uv_source_width
);
320 memcpy(vbuf
, v0_ptr
, uv_source_width
);
324 ubuf
[uv_source_width
] = ubuf
[uv_source_width
- 1];
325 vbuf
[uv_source_width
] = vbuf
[uv_source_width
- 1];
327 if (source_dx
== kFractionMax
) { // Not scaled
328 FastConvertYUVToRGB32Row(y_ptr
, u_ptr
, v_ptr
,
330 } else if (filter
& FILTER_BILINEAR_H
) {
331 LinearScaleYUVToRGB32Row(y_ptr
, u_ptr
, v_ptr
,
332 dest_pixel
, width
, source_dx
);
334 // Specialized scalers and rotation.
335 #if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_MSC_VER) && defined(_M_IX86)
336 if(mozilla::supports_sse()) {
337 if (width
== (source_width
* 2)) {
338 DoubleYUVToRGB32Row_SSE(y_ptr
, u_ptr
, v_ptr
,
340 } else if ((source_dx
& kFractionMask
) == 0) {
341 // Scaling by integer scale factor. ie half.
342 ConvertYUVToRGB32Row_SSE(y_ptr
, u_ptr
, v_ptr
,
344 source_dx
>> kFractionBits
);
345 } else if (source_dx_uv
== source_dx
) { // Not rotated.
346 ScaleYUVToRGB32Row(y_ptr
, u_ptr
, v_ptr
,
347 dest_pixel
, width
, source_dx
);
349 RotateConvertYUVToRGB32Row_SSE(y_ptr
, u_ptr
, v_ptr
,
351 source_dx
>> kFractionBits
,
352 source_dx_uv
>> kFractionBits
);
356 ScaleYUVToRGB32Row_C(y_ptr
, u_ptr
, v_ptr
,
357 dest_pixel
, width
, source_dx
);
360 ScaleYUVToRGB32Row(y_ptr
, u_ptr
, v_ptr
,
361 dest_pixel
, width
, source_dx
);
365 // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms.
371 } // namespace mozilla