Bug 1669129 - [devtools] Enable devtools.overflow.debugging.enabled. r=jdescottes
[gecko.git] / gfx / ycbcr / scale_yuv_argb.cpp
blob74bbb5e60630d8d87682d70a7d462d378b2f23c7
1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * Copyright 2016 Mozilla Foundation
5 * Use of this source code is governed by a BSD-style license
6 * that can be found in the LICENSE file in the root of the source
7 * tree. An additional intellectual property rights grant can be found
8 * in the file PATENTS. All contributing project authors may
9 * be found in the AUTHORS file in the root of the source tree.
12 #include "libyuv/scale.h"
14 #include <assert.h>
15 #include <string.h>
17 #include "libyuv/cpu_id.h"
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20 #include "libyuv/video_common.h"
22 #ifdef __cplusplus
23 namespace libyuv {
24 extern "C" {
25 #endif
27 // YUV to RGB conversion and scaling functions were implemented by referencing
28 // scale_argb.cc
30 // libyuv already has ScaleYUVToARGBBilinearUp(), but its implementation is not
31 // completed yet. Implementations of the functions are based on it.
32 // At first, ScaleYUVToARGBBilinearUp() was implemented by modidying the
33 // libyuv's one. Then all another functions were implemented similarly.
35 // Function relationship between yuv_convert.cpp abd scale_argb.cc are like
36 // the followings
37 // - ScaleYUVToARGBDown2() <-- ScaleARGBDown2()
38 // - ScaleYUVToARGBDownEven() <-- ScaleARGBDownEven()
39 // - ScaleYUVToARGBBilinearDown() <-- ScaleARGBBilinearDown()
40 // - ScaleYUVToARGBBilinearUp() <-- ScaleARGBBilinearUp() and ScaleYUVToARGBBilinearUp() in libyuv
41 // - ScaleYUVToARGBSimple() <-- ScaleARGBSimple()
42 // - ScaleYUVToARGB() <-- ScaleARGB() // Removed some function calls for simplicity.
43 // - YUVToARGBScale() <-- ARGBScale()
45 // Callings and selections of InterpolateRow() and ScaleARGBFilterCols() were
46 // kept as same as possible.
48 // The followings changes were done to each scaling functions.
50 // -[1] Allocate YUV conversion buffer and use it as source buffer of scaling.
51 // Its usage is borrowed from the libyuv's ScaleYUVToARGBBilinearUp().
52 // -[2] Conversion from YUV to RGB was abstracted as YUVBuferIter.
53 // It is for handling multiple yuv color formats.
54 // -[3] Modified scaling functions as to handle YUV conversion buffer and
55 // use YUVBuferIter.
56 // -[4] Color conversion function selections in YUVBuferIter were borrowed from
57 // I444ToARGBMatrix(), I422ToARGBMatrix() and I420ToARGBMatrix()
59 static __inline int Abs(int v) {
60 return v >= 0 ? v : -v;
63 typedef mozilla::gfx::YUVColorSpace YUVColorSpace;
65 struct YUVBuferIter {
66 int src_width;
67 int src_height;
68 int src_stride_y;
69 int src_stride_u;
70 int src_stride_v;
71 const uint8* src_y;
72 const uint8* src_u;
73 const uint8* src_v;
75 uint32 src_fourcc;
76 const struct YuvConstants* yuvconstants;
77 int y_index;
78 const uint8* src_row_y;
79 const uint8* src_row_u;
80 const uint8* src_row_v;
82 void (*YUVToARGBRow)(const uint8* y_buf,
83 const uint8* u_buf,
84 const uint8* v_buf,
85 uint8* rgb_buf,
86 const struct YuvConstants* yuvconstants,
87 int width);
88 void (*MoveTo)(YUVBuferIter& iter, int y_index);
89 void (*MoveToNextRow)(YUVBuferIter& iter);
92 void YUVBuferIter_InitI422(YUVBuferIter& iter) {
93 iter.YUVToARGBRow = I422ToARGBRow_C;
94 #if defined(HAS_I422TOARGBROW_SSSE3)
95 if (TestCpuFlag(kCpuHasSSSE3)) {
96 iter.YUVToARGBRow = I422ToARGBRow_Any_SSSE3;
97 if (IS_ALIGNED(iter.src_width, 8)) {
98 iter.YUVToARGBRow = I422ToARGBRow_SSSE3;
101 #endif
102 #if defined(HAS_I422TOARGBROW_AVX2)
103 if (TestCpuFlag(kCpuHasAVX2)) {
104 iter.YUVToARGBRow = I422ToARGBRow_Any_AVX2;
105 if (IS_ALIGNED(iter.src_width, 16)) {
106 iter.YUVToARGBRow = I422ToARGBRow_AVX2;
109 #endif
110 #if defined(HAS_I422TOARGBROW_NEON)
111 if (TestCpuFlag(kCpuHasNEON)) {
112 iter.YUVToARGBRow = I422ToARGBRow_Any_NEON;
113 if (IS_ALIGNED(iter.src_width, 8)) {
114 iter.YUVToARGBRow = I422ToARGBRow_NEON;
117 #endif
118 #if defined(HAS_I422TOARGBROW_DSPR2)
119 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(iter.src_width, 4) &&
120 IS_ALIGNED(iter.src_y, 4) && IS_ALIGNED(iter.src_stride_y, 4) &&
121 IS_ALIGNED(iter.src_u, 2) && IS_ALIGNED(iter.src_stride_u, 2) &&
122 IS_ALIGNED(iter.src_v, 2) && IS_ALIGNED(iter.src_stride_v, 2) {
123 // Always satisfy IS_ALIGNED(argb_cnv_row, 4) && IS_ALIGNED(argb_cnv_rowstride, 4)
124 iter.YUVToARGBRow = I422ToARGBRow_DSPR2;
126 #endif
129 void YUVBuferIter_InitI444(YUVBuferIter& iter) {
130 iter.YUVToARGBRow = I444ToARGBRow_C;
131 #if defined(HAS_I444TOARGBROW_SSSE3)
132 if (TestCpuFlag(kCpuHasSSSE3)) {
133 iter.YUVToARGBRow = I444ToARGBRow_Any_SSSE3;
134 if (IS_ALIGNED(iter.src_width, 8)) {
135 iter.YUVToARGBRow = I444ToARGBRow_SSSE3;
138 #endif
139 #if defined(HAS_I444TOARGBROW_AVX2)
140 if (TestCpuFlag(kCpuHasAVX2)) {
141 iter.YUVToARGBRow = I444ToARGBRow_Any_AVX2;
142 if (IS_ALIGNED(iter.src_width, 16)) {
143 iter.YUVToARGBRow = I444ToARGBRow_AVX2;
146 #endif
147 #if defined(HAS_I444TOARGBROW_NEON)
148 if (TestCpuFlag(kCpuHasNEON)) {
149 iter.YUVToARGBRow = I444ToARGBRow_Any_NEON;
150 if (IS_ALIGNED(iter.src_width, 8)) {
151 iter.YUVToARGBRow = I444ToARGBRow_NEON;
154 #endif
158 static void YUVBuferIter_MoveToForI444(YUVBuferIter& iter, int y_index) {
159 iter.y_index = y_index;
160 iter.src_row_y = iter.src_y + y_index * iter.src_stride_y;
161 iter.src_row_u = iter.src_u + y_index * iter.src_stride_u;
162 iter.src_row_v = iter.src_v + y_index * iter.src_stride_v;
165 static void YUVBuferIter_MoveToNextRowForI444(YUVBuferIter& iter) {
166 iter.src_row_y += iter.src_stride_y;
167 iter.src_row_u += iter.src_stride_u;
168 iter.src_row_v += iter.src_stride_v;
169 iter.y_index++;
172 static void YUVBuferIter_MoveToForI422(YUVBuferIter& iter, int y_index) {
173 iter.y_index = y_index;
174 iter.src_row_y = iter.src_y + y_index * iter.src_stride_y;
175 iter.src_row_u = iter.src_u + y_index * iter.src_stride_u;
176 iter.src_row_v = iter.src_v + y_index * iter.src_stride_v;
179 static void YUVBuferIter_MoveToNextRowForI422(YUVBuferIter& iter) {
180 iter.src_row_y += iter.src_stride_y;
181 iter.src_row_u += iter.src_stride_u;
182 iter.src_row_v += iter.src_stride_v;
183 iter.y_index++;
186 static void YUVBuferIter_MoveToForI420(YUVBuferIter& iter, int y_index) {
187 const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
188 int uv_y_index = y_index >> kYShift;
190 iter.y_index = y_index;
191 iter.src_row_y = iter.src_y + y_index * iter.src_stride_y;
192 iter.src_row_u = iter.src_u + uv_y_index * iter.src_stride_u;
193 iter.src_row_v = iter.src_v + uv_y_index * iter.src_stride_v;
196 static void YUVBuferIter_MoveToNextRowForI420(YUVBuferIter& iter) {
197 iter.src_row_y += iter.src_stride_y;
198 if (iter.y_index & 1) {
199 iter.src_row_u += iter.src_stride_u;
200 iter.src_row_v += iter.src_stride_v;
202 iter.y_index++;
205 static __inline void YUVBuferIter_ConvertToARGBRow(YUVBuferIter& iter, uint8* argb_row) {
206 iter.YUVToARGBRow(iter.src_row_y, iter.src_row_u, iter.src_row_v, argb_row, iter.yuvconstants, iter.src_width);
209 void YUVBuferIter_Init(YUVBuferIter& iter, uint32 src_fourcc, YUVColorSpace yuv_color_space) {
210 iter.src_fourcc = src_fourcc;
211 iter.y_index = 0;
212 iter.src_row_y = iter.src_y;
213 iter.src_row_u = iter.src_u;
214 iter.src_row_v = iter.src_v;
215 switch (yuv_color_space) {
216 case YUVColorSpace::BT2020:
217 iter.yuvconstants = &kYuv2020Constants;
218 break;
219 case YUVColorSpace::BT709:
220 iter.yuvconstants = &kYuvH709Constants;
221 break;
222 default:
223 iter.yuvconstants = &kYuvI601Constants;
226 if (src_fourcc == FOURCC_I444) {
227 YUVBuferIter_InitI444(iter);
228 iter.MoveTo = YUVBuferIter_MoveToForI444;
229 iter.MoveToNextRow = YUVBuferIter_MoveToNextRowForI444;
230 } else if(src_fourcc == FOURCC_I422){
231 YUVBuferIter_InitI422(iter);
232 iter.MoveTo = YUVBuferIter_MoveToForI422;
233 iter.MoveToNextRow = YUVBuferIter_MoveToNextRowForI422;
234 } else {
235 assert(src_fourcc == FOURCC_I420); // Should be FOURCC_I420
236 YUVBuferIter_InitI422(iter);
237 iter.MoveTo = YUVBuferIter_MoveToForI420;
238 iter.MoveToNextRow = YUVBuferIter_MoveToNextRowForI420;
242 // ScaleARGB ARGB, 1/2
243 // This is an optimized version for scaling down a ARGB to 1/2 of
244 // its original size.
245 static void ScaleYUVToARGBDown2(int src_width, int src_height,
246 int dst_width, int dst_height,
247 int src_stride_y,
248 int src_stride_u,
249 int src_stride_v,
250 int dst_stride_argb,
251 const uint8* src_y,
252 const uint8* src_u,
253 const uint8* src_v,
254 uint8* dst_argb,
255 int x, int dx, int y, int dy,
256 enum FilterMode filtering,
257 uint32 src_fourcc,
258 YUVColorSpace yuv_color_space) {
259 int j;
261 // Allocate 2 rows of ARGB for source conversion.
262 const int kRowSize = (src_width * 4 + 15) & ~15;
263 align_buffer_64(argb_cnv_row, kRowSize * 2);
264 uint8* argb_cnv_rowptr = argb_cnv_row;
265 int argb_cnv_rowstride = kRowSize;
267 YUVBuferIter iter;
268 iter.src_width = src_width;
269 iter.src_height = src_height;
270 iter.src_stride_y = src_stride_y;
271 iter.src_stride_u = src_stride_u;
272 iter.src_stride_v = src_stride_v;
273 iter.src_y = src_y;
274 iter.src_u = src_u;
275 iter.src_v = src_v;
276 YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
278 void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
279 uint8* dst_argb, int dst_width) =
280 filtering == kFilterNone ? ScaleARGBRowDown2_C :
281 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
282 ScaleARGBRowDown2Box_C);
283 assert(dx == 65536 * 2); // Test scale factor of 2.
284 assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
285 // Advance to odd row, even column.
286 int yi = y >> 16;
287 iter.MoveTo(iter, yi);
288 ptrdiff_t x_offset;
289 if (filtering == kFilterBilinear) {
290 x_offset = (x >> 16) * 4;
291 } else {
292 x_offset = ((x >> 16) - 1) * 4;
294 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
295 if (TestCpuFlag(kCpuHasSSE2)) {
296 ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_SSE2 :
297 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2 :
298 ScaleARGBRowDown2Box_Any_SSE2);
299 if (IS_ALIGNED(dst_width, 4)) {
300 ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
301 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
302 ScaleARGBRowDown2Box_SSE2);
306 #endif
307 #if defined(HAS_SCALEARGBROWDOWN2_NEON)
308 if (TestCpuFlag(kCpuHasNEON)) {
309 ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_NEON :
310 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON :
311 ScaleARGBRowDown2Box_Any_NEON);
312 if (IS_ALIGNED(dst_width, 8)) {
313 ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_NEON :
314 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON :
315 ScaleARGBRowDown2Box_NEON);
318 #endif
320 const int dyi = dy >> 16;
321 int lastyi = yi;
322 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
323 // Prepare next row if necessary
324 if (filtering != kFilterLinear) {
325 if ((yi + dyi) < (src_height - 1)) {
326 iter.MoveTo(iter, yi + dyi);
327 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
328 } else {
329 argb_cnv_rowstride = 0;
333 if (filtering == kFilterLinear) {
334 argb_cnv_rowstride = 0;
336 const int max_yi = src_height - 1;
337 const int max_yi_minus_dyi = max_yi - dyi;
338 for (j = 0; j < dst_height; ++j) {
339 if (yi != lastyi) {
340 if (yi > max_yi) {
341 yi = max_yi;
343 if (yi != lastyi) {
344 if (filtering == kFilterLinear) {
345 iter.MoveTo(iter, yi);
346 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
347 lastyi = yi;
348 } else {
349 // Prepare current row
350 if (yi == iter.y_index) {
351 argb_cnv_rowptr = argb_cnv_rowptr + argb_cnv_rowstride;
352 argb_cnv_rowstride = - argb_cnv_rowstride;
353 } else {
354 iter.MoveTo(iter, yi);
355 argb_cnv_rowptr = argb_cnv_row;
356 argb_cnv_rowstride = kRowSize;
357 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
359 // Prepare next row if necessary
360 if (iter.y_index < max_yi) {
361 int next_yi = yi < max_yi_minus_dyi ? yi + dyi : max_yi;
362 iter.MoveTo(iter, next_yi);
363 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
364 } else {
365 argb_cnv_rowstride = 0;
367 lastyi = yi;
371 ScaleARGBRowDown2(argb_cnv_rowptr + x_offset, argb_cnv_rowstride, dst_argb, dst_width);
372 dst_argb += dst_stride_argb;
373 yi += dyi;
376 free_aligned_buffer_64(argb_cnv_row);
379 // ScaleARGB ARGB Even
380 // This is an optimized version for scaling down a ARGB to even
381 // multiple of its original size.
382 static void ScaleYUVToARGBDownEven(int src_width, int src_height,
383 int dst_width, int dst_height,
384 int src_stride_y,
385 int src_stride_u,
386 int src_stride_v,
387 int dst_stride_argb,
388 const uint8* src_y,
389 const uint8* src_u,
390 const uint8* src_v,
391 uint8* dst_argb,
392 int x, int dx, int y, int dy,
393 enum FilterMode filtering,
394 uint32 src_fourcc,
395 YUVColorSpace yuv_color_space) {
396 int j;
397 // Allocate 2 rows of ARGB for source conversion.
398 const int kRowSize = (src_width * 4 + 15) & ~15;
399 align_buffer_64(argb_cnv_row, kRowSize * 2);
400 uint8* argb_cnv_rowptr = argb_cnv_row;
401 int argb_cnv_rowstride = kRowSize;
403 int col_step = dx >> 16;
404 void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
405 int src_step, uint8* dst_argb, int dst_width) =
406 filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
407 assert(IS_ALIGNED(src_width, 2));
408 assert(IS_ALIGNED(src_height, 2));
409 int yi = y >> 16;
410 const ptrdiff_t x_offset = (x >> 16) * 4;
412 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
413 if (TestCpuFlag(kCpuHasSSE2)) {
414 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 :
415 ScaleARGBRowDownEven_Any_SSE2;
416 if (IS_ALIGNED(dst_width, 4)) {
417 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
418 ScaleARGBRowDownEven_SSE2;
421 #endif
422 #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
423 if (TestCpuFlag(kCpuHasNEON)) {
424 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON :
425 ScaleARGBRowDownEven_Any_NEON;
426 if (IS_ALIGNED(dst_width, 4)) {
427 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
428 ScaleARGBRowDownEven_NEON;
431 #endif
433 YUVBuferIter iter;
434 iter.src_width = src_width;
435 iter.src_height = src_height;
436 iter.src_stride_y = src_stride_y;
437 iter.src_stride_u = src_stride_u;
438 iter.src_stride_v = src_stride_v;
439 iter.src_y = src_y;
440 iter.src_u = src_u;
441 iter.src_v = src_v;
442 YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
444 const int dyi = dy >> 16;
445 int lastyi = yi;
446 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
447 // Prepare next row if necessary
448 if (filtering != kFilterLinear) {
449 if ((yi + dyi) < (src_height - 1)) {
450 iter.MoveTo(iter, yi + dyi);
451 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
452 } else {
453 argb_cnv_rowstride = 0;
457 if (filtering == kFilterLinear) {
458 argb_cnv_rowstride = 0;
460 const int max_yi = src_height - 1;
461 const int max_yi_minus_dyi = max_yi - dyi;
462 for (j = 0; j < dst_height; ++j) {
463 if (yi != lastyi) {
464 if (yi > max_yi) {
465 yi = max_yi;
467 if (yi != lastyi) {
468 if (filtering == kFilterLinear) {
469 iter.MoveTo(iter, yi);
470 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
471 lastyi = yi;
472 } else {
473 // Prepare current row
474 if (yi == iter.y_index) {
475 argb_cnv_rowptr = argb_cnv_rowptr + argb_cnv_rowstride;
476 argb_cnv_rowstride = - argb_cnv_rowstride;
477 } else {
478 iter.MoveTo(iter, yi);
479 argb_cnv_rowptr = argb_cnv_row;
480 argb_cnv_rowstride = kRowSize;
481 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
483 // Prepare next row if necessary
484 if (iter.y_index < max_yi) {
485 int next_yi = yi < max_yi_minus_dyi ? yi + dyi : max_yi;
486 iter.MoveTo(iter, next_yi);
487 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
488 } else {
489 argb_cnv_rowstride = 0;
491 lastyi = yi;
495 ScaleARGBRowDownEven(argb_cnv_rowptr + x_offset, argb_cnv_rowstride, col_step, dst_argb, dst_width);
496 dst_argb += dst_stride_argb;
497 yi += dyi;
499 free_aligned_buffer_64(argb_cnv_row);
502 // Scale YUV to ARGB down with bilinear interpolation.
503 static void ScaleYUVToARGBBilinearDown(int src_width, int src_height,
504 int dst_width, int dst_height,
505 int src_stride_y,
506 int src_stride_u,
507 int src_stride_v,
508 int dst_stride_argb,
509 const uint8* src_y,
510 const uint8* src_u,
511 const uint8* src_v,
512 uint8* dst_argb,
513 int x, int dx, int y, int dy,
514 enum FilterMode filtering,
515 uint32 src_fourcc,
516 YUVColorSpace yuv_color_space) {
517 int j;
518 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
519 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
520 InterpolateRow_C;
521 void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
522 int dst_width, int x, int dx) =
523 (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
524 int64 xlast = x + (int64)(dst_width - 1) * dx;
525 int64 xl = (dx >= 0) ? x : xlast;
526 int64 xr = (dx >= 0) ? xlast : x;
527 int clip_src_width;
528 xl = (xl >> 16) & ~3; // Left edge aligned.
529 xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
530 xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
531 if (xr > src_width) {
532 xr = src_width;
534 clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
535 const ptrdiff_t xl_offset = xl * 4;
536 x -= (int)(xl << 16);
538 // Allocate 2 row of ARGB for source conversion.
539 const int kRowSize = (src_width * 4 + 15) & ~15;
540 align_buffer_64(argb_cnv_row, kRowSize * 2);
541 uint8* argb_cnv_rowptr = argb_cnv_row;
542 int argb_cnv_rowstride = kRowSize;
544 #if defined(HAS_INTERPOLATEROW_SSSE3)
545 if (TestCpuFlag(kCpuHasSSSE3)) {
546 InterpolateRow = InterpolateRow_Any_SSSE3;
547 if (IS_ALIGNED(clip_src_width, 16)) {
548 InterpolateRow = InterpolateRow_SSSE3;
551 #endif
552 #if defined(HAS_INTERPOLATEROW_AVX2)
553 if (TestCpuFlag(kCpuHasAVX2)) {
554 InterpolateRow = InterpolateRow_Any_AVX2;
555 if (IS_ALIGNED(clip_src_width, 32)) {
556 InterpolateRow = InterpolateRow_AVX2;
559 #endif
560 #if defined(HAS_INTERPOLATEROW_NEON)
561 if (TestCpuFlag(kCpuHasNEON)) {
562 InterpolateRow = InterpolateRow_Any_NEON;
563 if (IS_ALIGNED(clip_src_width, 16)) {
564 InterpolateRow = InterpolateRow_NEON;
567 #endif
568 #if defined(HAS_INTERPOLATEROW_DSPR2)
569 if (TestCpuFlag(kCpuHasDSPR2) &&
570 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(argb_cnv_rowstride, 4)) {
571 InterpolateRow = InterpolateRow_Any_DSPR2;
572 if (IS_ALIGNED(clip_src_width, 4)) {
573 InterpolateRow = InterpolateRow_DSPR2;
576 #endif
577 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
578 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
579 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
581 #endif
582 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
583 if (TestCpuFlag(kCpuHasNEON)) {
584 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
585 if (IS_ALIGNED(dst_width, 4)) {
586 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
589 #endif
591 int yi = y >> 16;
593 YUVBuferIter iter;
594 iter.src_width = src_width;
595 iter.src_height = src_height;
596 iter.src_stride_y = src_stride_y;
597 iter.src_stride_u = src_stride_u;
598 iter.src_stride_v = src_stride_v;
599 iter.src_y = src_y;
600 iter.src_u = src_u;
601 iter.src_v = src_v;
602 YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
603 iter.MoveTo(iter, yi);
605 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
606 // Allocate a row of ARGB.
607 align_buffer_64(row, clip_src_width * 4);
609 int lastyi = yi;
610 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
611 // Prepare next row if necessary
612 if (filtering != kFilterLinear) {
613 if ((yi + 1) < src_height) {
614 iter.MoveToNextRow(iter);
615 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
616 } else {
617 argb_cnv_rowstride = 0;
621 const int max_y = (src_height - 1) << 16;
622 const int max_yi = src_height - 1;
623 for (j = 0; j < dst_height; ++j) {
624 yi = y >> 16;
625 if (yi != lastyi) {
626 if (y > max_y) {
627 y = max_y;
628 yi = y >> 16;
630 if (yi != lastyi) {
631 if (filtering == kFilterLinear) {
632 iter.MoveTo(iter, yi);
633 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
634 lastyi = yi;
635 } else {
636 // Prepare current row
637 if (yi == iter.y_index) {
638 argb_cnv_rowptr = argb_cnv_rowptr + argb_cnv_rowstride;
639 argb_cnv_rowstride = - argb_cnv_rowstride;
640 } else {
641 iter.MoveTo(iter, yi);
642 argb_cnv_rowptr = argb_cnv_row;
643 argb_cnv_rowstride = kRowSize;
644 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
646 // Prepare next row if necessary
647 if (iter.y_index < max_yi) {
648 iter.MoveToNextRow(iter);
649 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
650 } else {
651 argb_cnv_rowstride = 0;
653 lastyi = yi;
657 if (filtering == kFilterLinear) {
658 ScaleARGBFilterCols(dst_argb, argb_cnv_rowptr + xl_offset, dst_width, x, dx);
659 } else {
660 int yf = (y >> 8) & 255;
661 InterpolateRow(row, argb_cnv_rowptr + xl_offset, argb_cnv_rowstride, clip_src_width, yf);
662 ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
664 dst_argb += dst_stride_argb;
665 y += dy;
667 free_aligned_buffer_64(row);
668 free_aligned_buffer_64(argb_cnv_row);
671 // Scale YUV to ARGB up with bilinear interpolation.
672 static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
673 int dst_width, int dst_height,
674 int src_stride_y,
675 int src_stride_u,
676 int src_stride_v,
677 int dst_stride_argb,
678 const uint8* src_y,
679 const uint8* src_u,
680 const uint8* src_v,
681 uint8* dst_argb,
682 int x, int dx, int y, int dy,
683 enum FilterMode filtering,
684 uint32 src_fourcc,
685 YUVColorSpace yuv_color_space) {
686 int j;
687 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
688 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
689 InterpolateRow_C;
690 void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
691 int dst_width, int x, int dx) =
692 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
693 const int max_y = (src_height - 1) << 16;
695 // Allocate 1 row of ARGB for source conversion.
696 align_buffer_64(argb_cnv_row, src_width * 4);
698 #if defined(HAS_INTERPOLATEROW_SSSE3)
699 if (TestCpuFlag(kCpuHasSSSE3)) {
700 InterpolateRow = InterpolateRow_Any_SSSE3;
701 if (IS_ALIGNED(dst_width, 4)) {
702 InterpolateRow = InterpolateRow_SSSE3;
705 #endif
706 #if defined(HAS_INTERPOLATEROW_AVX2)
707 if (TestCpuFlag(kCpuHasAVX2)) {
708 InterpolateRow = InterpolateRow_Any_AVX2;
709 if (IS_ALIGNED(dst_width, 8)) {
710 InterpolateRow = InterpolateRow_AVX2;
713 #endif
714 #if defined(HAS_INTERPOLATEROW_NEON)
715 if (TestCpuFlag(kCpuHasNEON)) {
716 InterpolateRow = InterpolateRow_Any_NEON;
717 if (IS_ALIGNED(dst_width, 4)) {
718 InterpolateRow = InterpolateRow_NEON;
721 #endif
722 #if defined(HAS_INTERPOLATEROW_DSPR2)
723 if (TestCpuFlag(kCpuHasDSPR2) &&
724 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
725 InterpolateRow = InterpolateRow_DSPR2;
727 #endif
728 if (src_width >= 32768) {
729 ScaleARGBFilterCols = filtering ?
730 ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
732 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
733 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
734 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
736 #endif
737 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
738 if (filtering && TestCpuFlag(kCpuHasNEON)) {
739 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
740 if (IS_ALIGNED(dst_width, 4)) {
741 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
744 #endif
745 #if defined(HAS_SCALEARGBCOLS_SSE2)
746 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
747 ScaleARGBFilterCols = ScaleARGBCols_SSE2;
749 #endif
750 #if defined(HAS_SCALEARGBCOLS_NEON)
751 if (!filtering && TestCpuFlag(kCpuHasNEON)) {
752 ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
753 if (IS_ALIGNED(dst_width, 8)) {
754 ScaleARGBFilterCols = ScaleARGBCols_NEON;
757 #endif
758 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
759 ScaleARGBFilterCols = ScaleARGBColsUp2_C;
760 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
761 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
762 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
764 #endif
767 if (y > max_y) {
768 y = max_y;
771 int yi = y >> 16;
773 YUVBuferIter iter;
774 iter.src_width = src_width;
775 iter.src_height = src_height;
776 iter.src_stride_y = src_stride_y;
777 iter.src_stride_u = src_stride_u;
778 iter.src_stride_v = src_stride_v;
779 iter.src_y = src_y;
780 iter.src_u = src_u;
781 iter.src_v = src_v;
782 YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
783 iter.MoveTo(iter, yi);
785 // Allocate 2 rows of ARGB.
786 const int kRowSize = (dst_width * 4 + 15) & ~15;
787 align_buffer_64(row, kRowSize * 2);
789 uint8* rowptr = row;
790 int rowstride = kRowSize;
791 int lastyi = yi;
793 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
794 ScaleARGBFilterCols(rowptr, argb_cnv_row, dst_width, x, dx);
796 if (filtering == kFilterLinear) {
797 rowstride = 0;
799 // Prepare next row if necessary
800 if (filtering != kFilterLinear) {
801 if ((yi + 1) < src_height) {
802 iter.MoveToNextRow(iter);
803 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
804 ScaleARGBFilterCols(rowptr + rowstride, argb_cnv_row, dst_width, x, dx);
805 }else {
806 rowstride = 0;
810 const int max_yi = src_height - 1;
811 for (j = 0; j < dst_height; ++j) {
812 yi = y >> 16;
813 if (yi != lastyi) {
814 if (y > max_y) {
815 y = max_y;
816 yi = y >> 16;
818 if (yi != lastyi) {
819 if (filtering == kFilterLinear) {
820 iter.MoveToNextRow(iter);
821 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
822 ScaleARGBFilterCols(rowptr, argb_cnv_row, dst_width, x, dx);
823 } else {
824 // Prepare next row if necessary
825 if (yi < max_yi) {
826 iter.MoveToNextRow(iter);
827 rowptr += rowstride;
828 rowstride = -rowstride;
829 // TODO(fbarchard): Convert the clipped region of row.
830 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
831 ScaleARGBFilterCols(rowptr + rowstride, argb_cnv_row, dst_width, x, dx);
832 } else {
833 rowstride = 0;
836 lastyi = yi;
839 if (filtering == kFilterLinear) {
840 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
841 } else {
842 int yf = (y >> 8) & 255;
843 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
845 dst_argb += dst_stride_argb;
846 y += dy;
848 free_aligned_buffer_64(row);
849 free_aligned_buffer_64(argb_cnv_row);
852 // Scale ARGB to/from any dimensions, without interpolation.
853 // Fixed point math is used for performance: The upper 16 bits
854 // of x and dx is the integer part of the source position and
855 // the lower 16 bits are the fixed decimal part.
857 static void ScaleYUVToARGBSimple(int src_width, int src_height,
858 int dst_width, int dst_height,
859 int src_stride_y,
860 int src_stride_u,
861 int src_stride_v,
862 int dst_stride_argb,
863 const uint8* src_y,
864 const uint8* src_u,
865 const uint8* src_v,
866 uint8* dst_argb,
867 int x, int dx, int y, int dy,
868 uint32 src_fourcc,
869 YUVColorSpace yuv_color_space) {
870 int j;
871 void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
872 int dst_width, int x, int dx) =
873 (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
875 // Allocate 1 row of ARGB for source conversion.
876 align_buffer_64(argb_cnv_row, src_width * 4);
878 #if defined(HAS_SCALEARGBCOLS_SSE2)
879 if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
880 ScaleARGBCols = ScaleARGBCols_SSE2;
882 #endif
883 #if defined(HAS_SCALEARGBCOLS_NEON)
884 if (TestCpuFlag(kCpuHasNEON)) {
885 ScaleARGBCols = ScaleARGBCols_Any_NEON;
886 if (IS_ALIGNED(dst_width, 8)) {
887 ScaleARGBCols = ScaleARGBCols_NEON;
890 #endif
891 if (src_width * 2 == dst_width && x < 0x8000) {
892 ScaleARGBCols = ScaleARGBColsUp2_C;
893 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
894 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
895 ScaleARGBCols = ScaleARGBColsUp2_SSE2;
897 #endif
900 int yi = y >> 16;
902 YUVBuferIter iter;
903 iter.src_width = src_width;
904 iter.src_height = src_height;
905 iter.src_stride_y = src_stride_y;
906 iter.src_stride_u = src_stride_u;
907 iter.src_stride_v = src_stride_v;
908 iter.src_y = src_y;
909 iter.src_u = src_u;
910 iter.src_v = src_v;
911 YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
912 iter.MoveTo(iter, yi);
914 int lasty = yi;
915 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
917 for (j = 0; j < dst_height; ++j) {
918 yi = y >> 16;
919 if (yi != lasty) {
920 iter.MoveTo(iter, yi);
921 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
922 lasty = yi;
924 ScaleARGBCols(dst_argb, argb_cnv_row, dst_width, x, dx);
925 dst_argb += dst_stride_argb;
926 y += dy;
928 free_aligned_buffer_64(argb_cnv_row);
931 static void YUVToARGBCopy(const uint8* src_y, int src_stride_y,
932 const uint8* src_u, int src_stride_u,
933 const uint8* src_v, int src_stride_v,
934 int src_width, int src_height,
935 uint8* dst_argb, int dst_stride_argb,
936 int dst_width, int dst_height,
937 uint32 src_fourcc,
938 YUVColorSpace yuv_color_space)
940 YUVBuferIter iter;
941 iter.src_width = src_width;
942 iter.src_height = src_height;
943 iter.src_stride_y = src_stride_y;
944 iter.src_stride_u = src_stride_u;
945 iter.src_stride_v = src_stride_v;
946 iter.src_y = src_y;
947 iter.src_u = src_u;
948 iter.src_v = src_v;
949 YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
951 for (int j = 0; j < dst_height; ++j) {
952 YUVBuferIter_ConvertToARGBRow(iter, dst_argb);
953 iter.MoveToNextRow(iter);
954 dst_argb += dst_stride_argb;
958 static void ScaleYUVToARGB(const uint8* src_y, int src_stride_y,
959 const uint8* src_u, int src_stride_u,
960 const uint8* src_v, int src_stride_v,
961 int src_width, int src_height,
962 uint8* dst_argb, int dst_stride_argb,
963 int dst_width, int dst_height,
964 enum FilterMode filtering,
965 uint32 src_fourcc,
966 YUVColorSpace yuv_color_space)
968 // Initial source x/y coordinate and step values as 16.16 fixed point.
969 int x = 0;
970 int y = 0;
971 int dx = 0;
972 int dy = 0;
973 // ARGB does not support box filter yet, but allow the user to pass it.
974 // Simplify filtering when possible.
975 filtering = ScaleFilterReduce(src_width, src_height,
976 dst_width, dst_height,
977 filtering);
978 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
979 &x, &y, &dx, &dy);
981 // Special case for integer step values.
982 if (((dx | dy) & 0xffff) == 0) {
983 if (!dx || !dy) { // 1 pixel wide and/or tall.
984 filtering = kFilterNone;
985 } else {
986 // Optimized even scale down. ie 2, 4, 6, 8, 10x.
987 if (!(dx & 0x10000) && !(dy & 0x10000)) {
988 if (dx == 0x20000) {
989 // Optimized 1/2 downsample.
990 ScaleYUVToARGBDown2(src_width, src_height,
991 dst_width, dst_height,
992 src_stride_y,
993 src_stride_u,
994 src_stride_v,
995 dst_stride_argb,
996 src_y,
997 src_u,
998 src_v,
999 dst_argb,
1000 x, dx, y, dy,
1001 filtering,
1002 src_fourcc,
1003 yuv_color_space);
1004 return;
1006 ScaleYUVToARGBDownEven(src_width, src_height,
1007 dst_width, dst_height,
1008 src_stride_y,
1009 src_stride_u,
1010 src_stride_v,
1011 dst_stride_argb,
1012 src_y,
1013 src_u,
1014 src_v,
1015 dst_argb,
1016 x, dx, y, dy,
1017 filtering,
1018 src_fourcc,
1019 yuv_color_space);
1020 return;
1022 // Optimized odd scale down. ie 3, 5, 7, 9x.
1023 if ((dx & 0x10000) && (dy & 0x10000)) {
1024 filtering = kFilterNone;
1025 if (dx == 0x10000 && dy == 0x10000) {
1026 // Straight conversion and copy.
1027 YUVToARGBCopy(src_y, src_stride_y,
1028 src_u, src_stride_u,
1029 src_v, src_stride_v,
1030 src_width, src_height,
1031 dst_argb, dst_stride_argb,
1032 dst_width, dst_height,
1033 src_fourcc,
1034 yuv_color_space);
1035 return;
1040 if (filtering && dy < 65536) {
1041 ScaleYUVToARGBBilinearUp(src_width, src_height,
1042 dst_width, dst_height,
1043 src_stride_y,
1044 src_stride_u,
1045 src_stride_v,
1046 dst_stride_argb,
1047 src_y,
1048 src_u,
1049 src_v,
1050 dst_argb,
1051 x, dx, y, dy,
1052 filtering,
1053 src_fourcc,
1054 yuv_color_space);
1055 return;
1057 if (filtering) {
1058 ScaleYUVToARGBBilinearDown(src_width, src_height,
1059 dst_width, dst_height,
1060 src_stride_y,
1061 src_stride_u,
1062 src_stride_v,
1063 dst_stride_argb,
1064 src_y,
1065 src_u,
1066 src_v,
1067 dst_argb,
1068 x, dx, y, dy,
1069 filtering,
1070 src_fourcc,
1071 yuv_color_space);
1072 return;
1074 ScaleYUVToARGBSimple(src_width, src_height,
1075 dst_width, dst_height,
1076 src_stride_y,
1077 src_stride_u,
1078 src_stride_v,
1079 dst_stride_argb,
1080 src_y,
1081 src_u,
1082 src_v,
1083 dst_argb,
1084 x, dx, y, dy,
1085 src_fourcc,
1086 yuv_color_space);
1089 bool IsConvertSupported(uint32 src_fourcc)
1091 if (src_fourcc == FOURCC_I444 ||
1092 src_fourcc == FOURCC_I422 ||
1093 src_fourcc == FOURCC_I420) {
1094 return true;
1096 return false;
1099 LIBYUV_API
1100 int YUVToARGBScale(const uint8* src_y, int src_stride_y,
1101 const uint8* src_u, int src_stride_u,
1102 const uint8* src_v, int src_stride_v,
1103 uint32 src_fourcc,
1104 YUVColorSpace yuv_color_space,
1105 int src_width, int src_height,
1106 uint8* dst_argb, int dst_stride_argb,
1107 int dst_width, int dst_height,
1108 enum FilterMode filtering)
1110 if (!src_y || !src_u || !src_v ||
1111 src_width == 0 || src_height == 0 ||
1112 !dst_argb || dst_width <= 0 || dst_height <= 0) {
1113 return -1;
1115 if (!IsConvertSupported(src_fourcc)) {
1116 return -1;
1118 ScaleYUVToARGB(src_y, src_stride_y,
1119 src_u, src_stride_u,
1120 src_v, src_stride_v,
1121 src_width, src_height,
1122 dst_argb, dst_stride_argb,
1123 dst_width, dst_height,
1124 filtering,
1125 src_fourcc,
1126 yuv_color_space);
1127 return 0;
1130 #ifdef __cplusplus
1131 } // extern "C"
1132 } // namespace libyuv
1133 #endif