1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // This webpage shows layout of YV12 and other YUV formats
6 // http://www.fourcc.org/yuv.php
7 // The actual conversion is best described here
8 // http://en.wikipedia.org/wiki/YUV
9 // An article on optimizing YUV conversion using tables instead of multiplies
10 // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf
12 // YV12 is a full plane of Y and a half height, half width chroma planes
13 // YV16 is a full plane of Y and a full height, half width chroma planes
14 // YV24 is a full plane of Y and a full height, full width chroma planes
15 // Y8 is a full plane of Y and no chroma planes (i.e., monochrome)
17 // ARGB pixel format is output, which on little endian is stored as BGRA.
18 // The alpha is set to 255, allowing the application to use RGBA or RGB32.
20 #include "yuv_convert.h"
22 #include "mozilla/StaticPrefs_gfx.h"
24 #include "scale_yuv_argb.h"
25 // Header for low level row functions.
27 #include "mozilla/SSE.h"
28 #include "mozilla/IntegerRange.h"
34 // 16.16 fixed point arithmetic
35 const int kFractionBits
= 16;
36 const int kFractionMax
= 1 << kFractionBits
;
37 const int kFractionMask
= ((1 << kFractionBits
) - 1);
41 libyuv::FourCC
FourCCFromYUVType(YUVType aYUVType
) {
43 case YV24
: return libyuv::FOURCC_I444
;
44 case YV16
: return libyuv::FOURCC_I422
;
45 case YV12
: return libyuv::FOURCC_I420
;
46 case Y8
: return libyuv::FOURCC_I400
;
47 default: return libyuv::FOURCC_ANY
;
51 int GBRPlanarToARGB(const uint8_t* src_y
, int y_pitch
,
52 const uint8_t* src_u
, int u_pitch
,
53 const uint8_t* src_v
, int v_pitch
,
54 uint8_t* rgb_buf
, int rgb_pitch
,
55 int pic_width
, int pic_height
) {
56 // libyuv has no native conversion function for this
57 // fixme: replace with something less awful
58 for (const auto row
: IntegerRange(pic_height
)) {
59 for (const auto col
: IntegerRange(pic_width
)) {
60 rgb_buf
[rgb_pitch
* row
+ col
* 4 + 0] = src_u
[u_pitch
* row
+ col
];
61 rgb_buf
[rgb_pitch
* row
+ col
* 4 + 1] = src_y
[y_pitch
* row
+ col
];
62 rgb_buf
[rgb_pitch
* row
+ col
* 4 + 2] = src_v
[v_pitch
* row
+ col
];
63 rgb_buf
[rgb_pitch
* row
+ col
* 4 + 3] = 255;
69 // Convert a frame of YUV to 32 bit ARGB.
70 void ConvertYCbCrToRGB32(const uint8_t* y_buf
, const uint8_t* u_buf
,
71 const uint8_t* v_buf
, uint8_t* rgb_buf
, int pic_x
,
72 int pic_y
, int pic_width
, int pic_height
, int y_pitch
,
73 int uv_pitch
, int rgb_pitch
, YUVType yuv_type
,
74 YUVColorSpace yuv_color_space
,
75 ColorRange color_range
) {
76 // Deprecated function's conversion is accurate.
77 // libyuv converion is a bit inaccurate to get performance. It dynamically
78 // calculates RGB from YUV to use simd. In it, signed byte is used for
79 // conversion's coefficient, but it requests 129. libyuv cut 129 to 127. And
80 // only 6 bits are used for a decimal part during the dynamic calculation.
82 // The function is still fast on some old intel chips.
84 bool use_deprecated
= StaticPrefs::gfx_ycbcr_accurate_conversion() ||
85 (supports_mmx() && supports_sse() && !supports_sse3() &&
86 yuv_color_space
== YUVColorSpace::BT601
&&
87 color_range
== ColorRange::LIMITED
);
88 // The deprecated function only support BT601.
90 if (yuv_color_space
!= YUVColorSpace::BT601
) {
91 use_deprecated
= false;
94 ConvertYCbCrToRGB32_deprecated(y_buf
, u_buf
, v_buf
, rgb_buf
, pic_x
, pic_y
,
95 pic_width
, pic_height
, y_pitch
, uv_pitch
,
100 decltype(libyuv::I420ToARGBMatrix
)* fConvertYUVToARGB
= nullptr;
101 const uint8_t* src_y
= nullptr;
102 const uint8_t* src_u
= nullptr;
103 const uint8_t* src_v
= nullptr;
104 const libyuv::YuvConstants
* yuv_constant
= nullptr;
106 switch (yuv_color_space
) {
107 case YUVColorSpace::BT2020
:
108 yuv_constant
= color_range
== ColorRange::LIMITED
109 ? &libyuv::kYuv2020Constants
110 : &libyuv::kYuvV2020Constants
;
112 case YUVColorSpace::BT709
:
113 yuv_constant
= color_range
== ColorRange::LIMITED
114 ? &libyuv::kYuvH709Constants
115 : &libyuv::kYuvF709Constants
;
117 case YUVColorSpace::Identity
:
118 MOZ_ASSERT(yuv_type
== YV24
, "Identity (aka RGB) with chroma subsampling is unsupported");
119 if (yuv_type
== YV24
) {
122 [[fallthrough
]]; // Assuming BT601 for unsupported input is better than crashing
124 MOZ_FALLTHROUGH_ASSERT("Unsupported YUVColorSpace");
125 case YUVColorSpace::BT601
:
126 yuv_constant
= color_range
== ColorRange::LIMITED
127 ? &libyuv::kYuvI601Constants
128 : &libyuv::kYuvJPEGConstants
;
134 src_y
= y_buf
+ y_pitch
* pic_y
+ pic_x
;
135 src_u
= u_buf
+ uv_pitch
* pic_y
+ pic_x
;
136 src_v
= v_buf
+ uv_pitch
* pic_y
+ pic_x
;
138 if (yuv_color_space
== YUVColorSpace::Identity
) {
139 // Special case for RGB image
141 GBRPlanarToARGB(src_y
, y_pitch
, src_u
, uv_pitch
, src_v
, uv_pitch
,
142 rgb_buf
, rgb_pitch
, pic_width
, pic_height
);
147 fConvertYUVToARGB
= libyuv::I444ToARGBMatrix
;
151 src_y
= y_buf
+ y_pitch
* pic_y
+ pic_x
;
152 src_u
= u_buf
+ uv_pitch
* pic_y
+ pic_x
/ 2;
153 src_v
= v_buf
+ uv_pitch
* pic_y
+ pic_x
/ 2;
155 fConvertYUVToARGB
= libyuv::I422ToARGBMatrix
;
159 src_y
= y_buf
+ y_pitch
* pic_y
+ pic_x
;
160 src_u
= u_buf
+ (uv_pitch
* pic_y
+ pic_x
) / 2;
161 src_v
= v_buf
+ (uv_pitch
* pic_y
+ pic_x
) / 2;
163 fConvertYUVToARGB
= libyuv::I420ToARGBMatrix
;
167 src_y
= y_buf
+ y_pitch
* pic_y
+ pic_x
;
168 MOZ_ASSERT(u_buf
== nullptr);
169 MOZ_ASSERT(v_buf
== nullptr);
171 if (color_range
== ColorRange::LIMITED
) {
173 libyuv::I400ToARGB(src_y
, y_pitch
, rgb_buf
, rgb_pitch
, pic_width
,
178 libyuv::J400ToARGB(src_y
, y_pitch
, rgb_buf
, rgb_pitch
, pic_width
,
186 MOZ_ASSERT_UNREACHABLE("Unsupported YUV type");
190 fConvertYUVToARGB(src_y
, y_pitch
, src_u
, uv_pitch
, src_v
, uv_pitch
,
191 rgb_buf
, rgb_pitch
, yuv_constant
, pic_width
, pic_height
);
195 // Convert a frame of YUV to 32 bit ARGB.
196 void ConvertYCbCrToRGB32_deprecated(const uint8_t* y_buf
,
197 const uint8_t* u_buf
,
198 const uint8_t* v_buf
,
208 unsigned int y_shift
= yuv_type
== YV12
? 1 : 0;
209 unsigned int x_shift
= yuv_type
== YV24
? 0 : 1;
210 // Test for SSE because the optimized code uses movntq, which is not part of MMX.
211 bool has_sse
= supports_mmx() && supports_sse();
212 // There is no optimized YV24 SSE routine so we check for this and
213 // fall back to the C code.
214 has_sse
&= yuv_type
!= YV24
;
215 bool odd_pic_x
= yuv_type
!= YV24
&& pic_x
% 2 != 0;
216 int x_width
= odd_pic_x
? pic_width
- 1 : pic_width
;
218 for (int y
= pic_y
; y
< pic_height
+ pic_y
; ++y
) {
219 uint8_t* rgb_row
= rgb_buf
+ (y
- pic_y
) * rgb_pitch
;
220 const uint8_t* y_ptr
= y_buf
+ y
* y_pitch
+ pic_x
;
221 const uint8_t* u_ptr
= u_buf
+ (y
>> y_shift
) * uv_pitch
+ (pic_x
>> x_shift
);
222 const uint8_t* v_ptr
= v_buf
+ (y
>> y_shift
) * uv_pitch
+ (pic_x
>> x_shift
);
225 // Handle the single odd pixel manually and use the
226 // fast routines for the remaining.
227 FastConvertYUVToRGB32Row_C(y_ptr
++,
237 FastConvertYUVToRGB32Row(y_ptr
,
244 FastConvertYUVToRGB32Row_C(y_ptr
,
253 // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
258 // C version does 8 at a time to mimic MMX code
259 static void FilterRows_C(uint8_t* ybuf
, const uint8_t* y0_ptr
, const uint8_t* y1_ptr
,
260 int source_width
, int source_y_fraction
) {
261 int y1_fraction
= source_y_fraction
;
262 int y0_fraction
= 256 - y1_fraction
;
263 uint8_t* end
= ybuf
+ source_width
;
265 ybuf
[0] = (y0_ptr
[0] * y0_fraction
+ y1_ptr
[0] * y1_fraction
) >> 8;
266 ybuf
[1] = (y0_ptr
[1] * y0_fraction
+ y1_ptr
[1] * y1_fraction
) >> 8;
267 ybuf
[2] = (y0_ptr
[2] * y0_fraction
+ y1_ptr
[2] * y1_fraction
) >> 8;
268 ybuf
[3] = (y0_ptr
[3] * y0_fraction
+ y1_ptr
[3] * y1_fraction
) >> 8;
269 ybuf
[4] = (y0_ptr
[4] * y0_fraction
+ y1_ptr
[4] * y1_fraction
) >> 8;
270 ybuf
[5] = (y0_ptr
[5] * y0_fraction
+ y1_ptr
[5] * y1_fraction
) >> 8;
271 ybuf
[6] = (y0_ptr
[6] * y0_fraction
+ y1_ptr
[6] * y1_fraction
) >> 8;
272 ybuf
[7] = (y0_ptr
[7] * y0_fraction
+ y1_ptr
[7] * y1_fraction
) >> 8;
276 } while (ybuf
< end
);
279 #ifdef MOZILLA_MAY_SUPPORT_MMX
280 void FilterRows_MMX(uint8_t* ybuf
, const uint8_t* y0_ptr
, const uint8_t* y1_ptr
,
281 int source_width
, int source_y_fraction
);
284 #ifdef MOZILLA_MAY_SUPPORT_SSE2
285 void FilterRows_SSE2(uint8_t* ybuf
, const uint8_t* y0_ptr
, const uint8_t* y1_ptr
,
286 int source_width
, int source_y_fraction
);
289 static inline void FilterRows(uint8_t* ybuf
, const uint8_t* y0_ptr
,
290 const uint8_t* y1_ptr
, int source_width
,
291 int source_y_fraction
) {
292 #ifdef MOZILLA_MAY_SUPPORT_SSE2
293 if (mozilla::supports_sse2()) {
294 FilterRows_SSE2(ybuf
, y0_ptr
, y1_ptr
, source_width
, source_y_fraction
);
299 #ifdef MOZILLA_MAY_SUPPORT_MMX
300 if (mozilla::supports_mmx()) {
301 FilterRows_MMX(ybuf
, y0_ptr
, y1_ptr
, source_width
, source_y_fraction
);
306 FilterRows_C(ybuf
, y0_ptr
, y1_ptr
, source_width
, source_y_fraction
);
310 // Scale a frame of YUV to 32 bit ARGB.
311 void ScaleYCbCrToRGB32(const uint8_t* y_buf
,
312 const uint8_t* u_buf
,
313 const uint8_t* v_buf
,
323 YUVColorSpace yuv_color_space
,
324 ScaleFilter filter
) {
325 bool use_deprecated
=
326 StaticPrefs::gfx_ycbcr_accurate_conversion() ||
327 #if defined(XP_WIN) && defined(_M_X64)
328 // libyuv does not support SIMD scaling on win 64bit. See Bug 1295927.
331 (supports_mmx() && supports_sse() && !supports_sse3());
332 // The deprecated function only support BT601.
334 if (yuv_color_space
!= YUVColorSpace::BT601
) {
335 use_deprecated
= false;
337 if (use_deprecated
) {
338 ScaleYCbCrToRGB32_deprecated(y_buf
, u_buf
, v_buf
,
340 source_width
, source_height
,
351 libyuv::YUVToARGBScale(y_buf
, y_pitch
,
354 FourCCFromYUVType(yuv_type
),
356 source_width
, source_height
,
359 libyuv::kFilterBilinear
);
364 // Scale a frame of YUV to 32 bit ARGB.
365 void ScaleYCbCrToRGB32_deprecated(const uint8_t* y_buf
,
366 const uint8_t* u_buf
,
367 const uint8_t* v_buf
,
378 ScaleFilter filter
) {
379 bool has_mmx
= supports_mmx();
381 // 4096 allows 3 buffers to fit in 12k.
382 // Helps performance on CPU with 16K L1 cache.
383 // Large enough for 3830x2160 and 30" displays which are 2560x1600.
384 const int kFilterBufferSize
= 4096;
385 // Disable filtering if the screen is too big (to avoid buffer overflows).
386 // This should never happen to regular users: they don't have monitors
387 // wider than 4096 pixels.
388 // TODO(fbarchard): Allow rotated videos to filter.
389 if (source_width
> kFilterBufferSize
|| view_rotate
)
390 filter
= FILTER_NONE
;
392 unsigned int y_shift
= yuv_type
== YV12
? 1 : 0;
393 // Diagram showing origin and direction of source sampling.
399 // Rotations that start at right side of image.
400 if ((view_rotate
== ROTATE_180
) ||
401 (view_rotate
== ROTATE_270
) ||
402 (view_rotate
== MIRROR_ROTATE_0
) ||
403 (view_rotate
== MIRROR_ROTATE_90
)) {
404 y_buf
+= source_width
- 1;
405 u_buf
+= source_width
/ 2 - 1;
406 v_buf
+= source_width
/ 2 - 1;
407 source_width
= -source_width
;
409 // Rotations that start at bottom of image.
410 if ((view_rotate
== ROTATE_90
) ||
411 (view_rotate
== ROTATE_180
) ||
412 (view_rotate
== MIRROR_ROTATE_90
) ||
413 (view_rotate
== MIRROR_ROTATE_180
)) {
414 y_buf
+= (source_height
- 1) * y_pitch
;
415 u_buf
+= ((source_height
>> y_shift
) - 1) * uv_pitch
;
416 v_buf
+= ((source_height
>> y_shift
) - 1) * uv_pitch
;
417 source_height
= -source_height
;
420 // Handle zero sized destination.
421 if (width
== 0 || height
== 0)
423 int source_dx
= source_width
* kFractionMax
/ width
;
424 int source_dy
= source_height
* kFractionMax
/ height
;
425 int source_dx_uv
= source_dx
;
427 if ((view_rotate
== ROTATE_90
) ||
428 (view_rotate
== ROTATE_270
)) {
433 source_height
= source_width
;
435 int original_dx
= source_dx
;
436 int original_dy
= source_dy
;
437 source_dx
= ((original_dy
>> kFractionBits
) * y_pitch
) << kFractionBits
;
438 source_dx_uv
= ((original_dy
>> kFractionBits
) * uv_pitch
) << kFractionBits
;
439 source_dy
= original_dx
;
440 if (view_rotate
== ROTATE_90
) {
443 source_height
= -source_height
;
450 // Need padding because FilterRows() will write 1 to 16 extra pixels
451 // after the end for SSE2 version.
452 uint8_t yuvbuf
[16 + kFilterBufferSize
* 3 + 16];
454 reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(yuvbuf
+ 15) & ~15);
455 uint8_t* ubuf
= ybuf
+ kFilterBufferSize
;
456 uint8_t* vbuf
= ubuf
+ kFilterBufferSize
;
457 // TODO(fbarchard): Fixed point math is off by 1 on negatives.
458 int yscale_fixed
= (source_height
<< kFractionBits
) / height
;
460 // TODO(fbarchard): Split this into separate function for better efficiency.
461 for (int y
= 0; y
< height
; ++y
) {
462 uint8_t* dest_pixel
= rgb_buf
+ y
* rgb_pitch
;
463 int source_y_subpixel
= (y
* yscale_fixed
);
464 if (yscale_fixed
>= (kFractionMax
* 2)) {
465 source_y_subpixel
+= kFractionMax
/ 2; // For 1/2 or less, center filter.
467 int source_y
= source_y_subpixel
>> kFractionBits
;
469 const uint8_t* y0_ptr
= y_buf
+ source_y
* y_pitch
;
470 const uint8_t* y1_ptr
= y0_ptr
+ y_pitch
;
472 const uint8_t* u0_ptr
= u_buf
+ (source_y
>> y_shift
) * uv_pitch
;
473 const uint8_t* u1_ptr
= u0_ptr
+ uv_pitch
;
474 const uint8_t* v0_ptr
= v_buf
+ (source_y
>> y_shift
) * uv_pitch
;
475 const uint8_t* v1_ptr
= v0_ptr
+ uv_pitch
;
477 // vertical scaler uses 16.8 fixed point
478 int source_y_fraction
= (source_y_subpixel
& kFractionMask
) >> 8;
479 int source_uv_fraction
=
480 ((source_y_subpixel
>> y_shift
) & kFractionMask
) >> 8;
482 const uint8_t* y_ptr
= y0_ptr
;
483 const uint8_t* u_ptr
= u0_ptr
;
484 const uint8_t* v_ptr
= v0_ptr
;
485 // Apply vertical filtering if necessary.
486 // TODO(fbarchard): Remove memcpy when not necessary.
487 if (filter
& mozilla::gfx::FILTER_BILINEAR_V
) {
488 if (yscale_fixed
!= kFractionMax
&&
489 source_y_fraction
&& ((source_y
+ 1) < source_height
)) {
490 FilterRows(ybuf
, y0_ptr
, y1_ptr
, source_width
, source_y_fraction
);
492 memcpy(ybuf
, y0_ptr
, source_width
);
495 ybuf
[source_width
] = ybuf
[source_width
-1];
496 int uv_source_width
= (source_width
+ 1) / 2;
497 if (yscale_fixed
!= kFractionMax
&&
498 source_uv_fraction
&&
499 (((source_y
>> y_shift
) + 1) < (source_height
>> y_shift
))) {
500 FilterRows(ubuf
, u0_ptr
, u1_ptr
, uv_source_width
, source_uv_fraction
);
501 FilterRows(vbuf
, v0_ptr
, v1_ptr
, uv_source_width
, source_uv_fraction
);
503 memcpy(ubuf
, u0_ptr
, uv_source_width
);
504 memcpy(vbuf
, v0_ptr
, uv_source_width
);
508 ubuf
[uv_source_width
] = ubuf
[uv_source_width
- 1];
509 vbuf
[uv_source_width
] = vbuf
[uv_source_width
- 1];
511 if (source_dx
== kFractionMax
) { // Not scaled
512 FastConvertYUVToRGB32Row(y_ptr
, u_ptr
, v_ptr
,
514 } else if (filter
& FILTER_BILINEAR_H
) {
515 LinearScaleYUVToRGB32Row(y_ptr
, u_ptr
, v_ptr
,
516 dest_pixel
, width
, source_dx
);
518 // Specialized scalers and rotation.
519 #if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_MSC_VER) && defined(_M_IX86) && !defined(__clang__)
520 if(mozilla::supports_sse()) {
521 if (width
== (source_width
* 2)) {
522 DoubleYUVToRGB32Row_SSE(y_ptr
, u_ptr
, v_ptr
,
524 } else if ((source_dx
& kFractionMask
) == 0) {
525 // Scaling by integer scale factor. ie half.
526 ConvertYUVToRGB32Row_SSE(y_ptr
, u_ptr
, v_ptr
,
528 source_dx
>> kFractionBits
);
529 } else if (source_dx_uv
== source_dx
) { // Not rotated.
530 ScaleYUVToRGB32Row(y_ptr
, u_ptr
, v_ptr
,
531 dest_pixel
, width
, source_dx
);
533 RotateConvertYUVToRGB32Row_SSE(y_ptr
, u_ptr
, v_ptr
,
535 source_dx
>> kFractionBits
,
536 source_dx_uv
>> kFractionBits
);
540 ScaleYUVToRGB32Row_C(y_ptr
, u_ptr
, v_ptr
,
541 dest_pixel
, width
, source_dx
);
545 ScaleYUVToRGB32Row(y_ptr
, u_ptr
, v_ptr
,
546 dest_pixel
, width
, source_dx
);
550 // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms.
554 void ConvertI420AlphaToARGB32(const uint8_t* y_buf
,
555 const uint8_t* u_buf
,
556 const uint8_t* v_buf
,
557 const uint8_t* a_buf
,
565 // The downstream graphics stack expects an attenuated input, hence why the
566 // attenuation parameter is set.
567 DebugOnly
<int> err
= libyuv::I420AlphaToARGB(y_buf
, ya_pitch
,
571 argb_buf
, argb_pitch
,
572 pic_width
, pic_height
, 1);
577 } // namespace mozilla