Merge mozilla-central to autoland. CLOSED TREE
[gecko.git] / gfx / ycbcr / scale_yuv_argb.cpp
blob2a103fb61ea2461e1e03f6948bfba56d959c50e7
1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * Copyright 2016 Mozilla Foundation
5 * Use of this source code is governed by a BSD-style license
6 * that can be found in the LICENSE file in the root of the source
7 * tree. An additional intellectual property rights grant can be found
8 * in the file PATENTS. All contributing project authors may
9 * be found in the AUTHORS file in the root of the source tree.
12 #include "libyuv/scale.h"
14 #include <assert.h>
15 #include <string.h>
17 #include "libyuv/convert_argb.h"
18 #include "libyuv/cpu_id.h"
19 #include "libyuv/row.h"
20 #include "libyuv/scale_row.h"
21 #include "libyuv/video_common.h"
23 #include "mozilla/gfx/Types.h"
25 #ifdef __cplusplus
26 namespace libyuv {
27 extern "C" {
28 #endif
30 // YUV to RGB conversion and scaling functions were implemented by referencing
31 // scale_argb.cc
33 // libyuv already has ScaleYUVToARGBBilinearUp(), but its implementation is not
34 // completed yet. Implementations of the functions are based on it.
35 // At first, ScaleYUVToARGBBilinearUp() was implemented by modifying the
36 // libyuv's one. Then all another functions were implemented similarly.
38 // Function relationship between yuv_convert.cpp and scale_argb.cc are like
39 // the followings
40 // - ScaleYUVToARGBDown2() <-- ScaleARGBDown2()
41 // - ScaleYUVToARGBDownEven() <-- ScaleARGBDownEven()
42 // - ScaleYUVToARGBBilinearDown() <-- ScaleARGBBilinearDown()
43 // - ScaleYUVToARGBBilinearUp() <-- ScaleARGBBilinearUp() and ScaleYUVToARGBBilinearUp() in libyuv
44 // - ScaleYUVToARGBSimple() <-- ScaleARGBSimple()
45 // - ScaleYUVToARGB() <-- ScaleARGB() // Removed some function calls for simplicity.
46 // - YUVToARGBScale() <-- ARGBScale()
48 // Callings and selections of InterpolateRow() and ScaleARGBFilterCols() were
49 // kept as same as possible.
51 // The followings changes were done to each scaling functions.
53 // -[1] Allocate YUV conversion buffer and use it as source buffer of scaling.
54 // Its usage is borrowed from the libyuv's ScaleYUVToARGBBilinearUp().
55 // -[2] Conversion from YUV to RGB was abstracted as YUVBuferIter.
56 // It is for handling multiple yuv color formats.
57 // -[3] Modified scaling functions as to handle YUV conversion buffer and
58 // use YUVBuferIter.
59 // -[4] Color conversion function selections in YUVBuferIter were borrowed from
60 // I444ToARGBMatrix(), I422ToARGBMatrix() and I420ToARGBMatrix()
62 typedef mozilla::gfx::YUVColorSpace YUVColorSpace;
64 struct YUVBuferIter {
65 int src_width;
66 int src_height;
67 int src_stride_y;
68 int src_stride_u;
69 int src_stride_v;
70 const uint8_t* src_y;
71 const uint8_t* src_u;
72 const uint8_t* src_v;
74 uint32_t src_fourcc;
75 const struct YuvConstants* yuvconstants;
76 int y_index;
77 const uint8_t* src_row_y;
78 const uint8_t* src_row_u;
79 const uint8_t* src_row_v;
81 void (*YUVToARGBRow)(const uint8_t* y_buf,
82 const uint8_t* u_buf,
83 const uint8_t* v_buf,
84 uint8_t* rgb_buf,
85 const struct YuvConstants* yuvconstants,
86 int width);
87 void (*MoveTo)(YUVBuferIter& iter, int y_index);
88 void (*MoveToNextRow)(YUVBuferIter& iter);
91 void YUVBuferIter_InitI422(YUVBuferIter& iter) {
92 iter.YUVToARGBRow = I422ToARGBRow_C;
93 #if defined(HAS_I422TOARGBROW_SSSE3)
94 if (TestCpuFlag(kCpuHasSSSE3)) {
95 iter.YUVToARGBRow = I422ToARGBRow_Any_SSSE3;
96 if (IS_ALIGNED(iter.src_width, 8)) {
97 iter.YUVToARGBRow = I422ToARGBRow_SSSE3;
100 #endif
101 #if defined(HAS_I422TOARGBROW_AVX2)
102 if (TestCpuFlag(kCpuHasAVX2)) {
103 iter.YUVToARGBRow = I422ToARGBRow_Any_AVX2;
104 if (IS_ALIGNED(iter.src_width, 16)) {
105 iter.YUVToARGBRow = I422ToARGBRow_AVX2;
108 #endif
109 #if defined(HAS_I422TOARGBROW_NEON)
110 if (TestCpuFlag(kCpuHasNEON)) {
111 iter.YUVToARGBRow = I422ToARGBRow_Any_NEON;
112 if (IS_ALIGNED(iter.src_width, 8)) {
113 iter.YUVToARGBRow = I422ToARGBRow_NEON;
116 #endif
117 #if defined(HAS_I422TOARGBROW_DSPR2)
118 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(iter.src_width, 4) &&
119 IS_ALIGNED(iter.src_y, 4) && IS_ALIGNED(iter.src_stride_y, 4) &&
120 IS_ALIGNED(iter.src_u, 2) && IS_ALIGNED(iter.src_stride_u, 2) &&
121 IS_ALIGNED(iter.src_v, 2) && IS_ALIGNED(iter.src_stride_v, 2) {
122 // Always satisfy IS_ALIGNED(argb_cnv_row, 4) && IS_ALIGNED(argb_cnv_rowstride, 4)
123 iter.YUVToARGBRow = I422ToARGBRow_DSPR2;
125 #endif
128 void YUVBuferIter_InitI444(YUVBuferIter& iter) {
129 iter.YUVToARGBRow = I444ToARGBRow_C;
130 #if defined(HAS_I444TOARGBROW_SSSE3)
131 if (TestCpuFlag(kCpuHasSSSE3)) {
132 iter.YUVToARGBRow = I444ToARGBRow_Any_SSSE3;
133 if (IS_ALIGNED(iter.src_width, 8)) {
134 iter.YUVToARGBRow = I444ToARGBRow_SSSE3;
137 #endif
138 #if defined(HAS_I444TOARGBROW_AVX2)
139 if (TestCpuFlag(kCpuHasAVX2)) {
140 iter.YUVToARGBRow = I444ToARGBRow_Any_AVX2;
141 if (IS_ALIGNED(iter.src_width, 16)) {
142 iter.YUVToARGBRow = I444ToARGBRow_AVX2;
145 #endif
146 #if defined(HAS_I444TOARGBROW_NEON)
147 if (TestCpuFlag(kCpuHasNEON)) {
148 iter.YUVToARGBRow = I444ToARGBRow_Any_NEON;
149 if (IS_ALIGNED(iter.src_width, 8)) {
150 iter.YUVToARGBRow = I444ToARGBRow_NEON;
153 #endif
157 static void YUVBuferIter_MoveToForI444(YUVBuferIter& iter, int y_index) {
158 iter.y_index = y_index;
159 iter.src_row_y = iter.src_y + y_index * iter.src_stride_y;
160 iter.src_row_u = iter.src_u + y_index * iter.src_stride_u;
161 iter.src_row_v = iter.src_v + y_index * iter.src_stride_v;
164 static void YUVBuferIter_MoveToNextRowForI444(YUVBuferIter& iter) {
165 iter.src_row_y += iter.src_stride_y;
166 iter.src_row_u += iter.src_stride_u;
167 iter.src_row_v += iter.src_stride_v;
168 iter.y_index++;
171 static void YUVBuferIter_MoveToForI422(YUVBuferIter& iter, int y_index) {
172 iter.y_index = y_index;
173 iter.src_row_y = iter.src_y + y_index * iter.src_stride_y;
174 iter.src_row_u = iter.src_u + y_index * iter.src_stride_u;
175 iter.src_row_v = iter.src_v + y_index * iter.src_stride_v;
178 static void YUVBuferIter_MoveToNextRowForI422(YUVBuferIter& iter) {
179 iter.src_row_y += iter.src_stride_y;
180 iter.src_row_u += iter.src_stride_u;
181 iter.src_row_v += iter.src_stride_v;
182 iter.y_index++;
185 static void YUVBuferIter_MoveToForI420(YUVBuferIter& iter, int y_index) {
186 const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
187 int uv_y_index = y_index >> kYShift;
189 iter.y_index = y_index;
190 iter.src_row_y = iter.src_y + y_index * iter.src_stride_y;
191 iter.src_row_u = iter.src_u + uv_y_index * iter.src_stride_u;
192 iter.src_row_v = iter.src_v + uv_y_index * iter.src_stride_v;
195 static void YUVBuferIter_MoveToNextRowForI420(YUVBuferIter& iter) {
196 iter.src_row_y += iter.src_stride_y;
197 if (iter.y_index & 1) {
198 iter.src_row_u += iter.src_stride_u;
199 iter.src_row_v += iter.src_stride_v;
201 iter.y_index++;
204 static __inline void YUVBuferIter_ConvertToARGBRow(YUVBuferIter& iter, uint8_t* argb_row) {
205 iter.YUVToARGBRow(iter.src_row_y, iter.src_row_u, iter.src_row_v, argb_row, iter.yuvconstants, iter.src_width);
208 void YUVBuferIter_Init(YUVBuferIter& iter, uint32_t src_fourcc, YUVColorSpace yuv_color_space) {
209 iter.src_fourcc = src_fourcc;
210 iter.y_index = 0;
211 iter.src_row_y = iter.src_y;
212 iter.src_row_u = iter.src_u;
213 iter.src_row_v = iter.src_v;
214 switch (yuv_color_space) {
215 case YUVColorSpace::BT2020:
216 iter.yuvconstants = &kYuv2020Constants;
217 break;
218 case YUVColorSpace::BT709:
219 iter.yuvconstants = &kYuvH709Constants;
220 break;
221 default:
222 iter.yuvconstants = &kYuvI601Constants;
225 if (src_fourcc == FOURCC_I444) {
226 YUVBuferIter_InitI444(iter);
227 iter.MoveTo = YUVBuferIter_MoveToForI444;
228 iter.MoveToNextRow = YUVBuferIter_MoveToNextRowForI444;
229 } else if(src_fourcc == FOURCC_I422){
230 YUVBuferIter_InitI422(iter);
231 iter.MoveTo = YUVBuferIter_MoveToForI422;
232 iter.MoveToNextRow = YUVBuferIter_MoveToNextRowForI422;
233 } else {
234 assert(src_fourcc == FOURCC_I420); // Should be FOURCC_I420
235 YUVBuferIter_InitI422(iter);
236 iter.MoveTo = YUVBuferIter_MoveToForI420;
237 iter.MoveToNextRow = YUVBuferIter_MoveToNextRowForI420;
241 // ScaleARGB ARGB, 1/2
242 // This is an optimized version for scaling down a ARGB to 1/2 of
243 // its original size.
244 static void ScaleYUVToARGBDown2(int src_width, int src_height,
245 int dst_width, int dst_height,
246 int src_stride_y,
247 int src_stride_u,
248 int src_stride_v,
249 int dst_stride_argb,
250 const uint8_t* src_y,
251 const uint8_t* src_u,
252 const uint8_t* src_v,
253 uint8_t* dst_argb,
254 int x, int dx, int y, int dy,
255 enum FilterMode filtering,
256 uint32_t src_fourcc,
257 YUVColorSpace yuv_color_space) {
258 int j;
260 // Allocate 2 rows of ARGB for source conversion.
261 const int kRowSize = (src_width * 4 + 15) & ~15;
262 align_buffer_64(argb_cnv_row, kRowSize * 2);
263 uint8_t* argb_cnv_rowptr = argb_cnv_row;
264 int argb_cnv_rowstride = kRowSize;
266 YUVBuferIter iter;
267 iter.src_width = src_width;
268 iter.src_height = src_height;
269 iter.src_stride_y = src_stride_y;
270 iter.src_stride_u = src_stride_u;
271 iter.src_stride_v = src_stride_v;
272 iter.src_y = src_y;
273 iter.src_u = src_u;
274 iter.src_v = src_v;
275 YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
277 void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
278 uint8_t* dst_argb, int dst_width) =
279 filtering == kFilterNone ? ScaleARGBRowDown2_C :
280 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
281 ScaleARGBRowDown2Box_C);
282 assert(dx == 65536 * 2); // Test scale factor of 2.
283 assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
284 // Advance to odd row, even column.
285 int yi = y >> 16;
286 iter.MoveTo(iter, yi);
287 ptrdiff_t x_offset;
288 if (filtering == kFilterBilinear) {
289 x_offset = (x >> 16) * 4;
290 } else {
291 x_offset = ((x >> 16) - 1) * 4;
293 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
294 if (TestCpuFlag(kCpuHasSSE2)) {
295 ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_SSE2 :
296 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2 :
297 ScaleARGBRowDown2Box_Any_SSE2);
298 if (IS_ALIGNED(dst_width, 4)) {
299 ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
300 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
301 ScaleARGBRowDown2Box_SSE2);
305 #endif
306 #if defined(HAS_SCALEARGBROWDOWN2_NEON)
307 if (TestCpuFlag(kCpuHasNEON)) {
308 ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_NEON :
309 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON :
310 ScaleARGBRowDown2Box_Any_NEON);
311 if (IS_ALIGNED(dst_width, 8)) {
312 ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_NEON :
313 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON :
314 ScaleARGBRowDown2Box_NEON);
317 #endif
319 const int dyi = dy >> 16;
320 int lastyi = yi;
321 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
322 // Prepare next row if necessary
323 if (filtering != kFilterLinear) {
324 if ((yi + dyi) < (src_height - 1)) {
325 iter.MoveTo(iter, yi + dyi);
326 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
327 } else {
328 argb_cnv_rowstride = 0;
332 if (filtering == kFilterLinear) {
333 argb_cnv_rowstride = 0;
335 const int max_yi = src_height - 1;
336 const int max_yi_minus_dyi = max_yi - dyi;
337 for (j = 0; j < dst_height; ++j) {
338 if (yi != lastyi) {
339 if (yi > max_yi) {
340 yi = max_yi;
342 if (yi != lastyi) {
343 if (filtering == kFilterLinear) {
344 iter.MoveTo(iter, yi);
345 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
346 lastyi = yi;
347 } else {
348 // Prepare current row
349 if (yi == iter.y_index) {
350 argb_cnv_rowptr = argb_cnv_rowptr + argb_cnv_rowstride;
351 argb_cnv_rowstride = - argb_cnv_rowstride;
352 } else {
353 iter.MoveTo(iter, yi);
354 argb_cnv_rowptr = argb_cnv_row;
355 argb_cnv_rowstride = kRowSize;
356 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
358 // Prepare next row if necessary
359 if (iter.y_index < max_yi) {
360 int next_yi = yi < max_yi_minus_dyi ? yi + dyi : max_yi;
361 iter.MoveTo(iter, next_yi);
362 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
363 } else {
364 argb_cnv_rowstride = 0;
366 lastyi = yi;
370 ScaleARGBRowDown2(argb_cnv_rowptr + x_offset, argb_cnv_rowstride, dst_argb, dst_width);
371 dst_argb += dst_stride_argb;
372 yi += dyi;
375 free_aligned_buffer_64(argb_cnv_row);
378 // ScaleARGB ARGB Even
379 // This is an optimized version for scaling down a ARGB to even
380 // multiple of its original size.
381 static void ScaleYUVToARGBDownEven(int src_width, int src_height,
382 int dst_width, int dst_height,
383 int src_stride_y,
384 int src_stride_u,
385 int src_stride_v,
386 int dst_stride_argb,
387 const uint8_t* src_y,
388 const uint8_t* src_u,
389 const uint8_t* src_v,
390 uint8_t* dst_argb,
391 int x, int dx, int y, int dy,
392 enum FilterMode filtering,
393 uint32_t src_fourcc,
394 YUVColorSpace yuv_color_space) {
395 int j;
396 // Allocate 2 rows of ARGB for source conversion.
397 const int kRowSize = (src_width * 4 + 15) & ~15;
398 align_buffer_64(argb_cnv_row, kRowSize * 2);
399 uint8_t* argb_cnv_rowptr = argb_cnv_row;
400 int argb_cnv_rowstride = kRowSize;
402 int col_step = dx >> 16;
403 void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
404 int src_step, uint8_t* dst_argb, int dst_width) =
405 filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
406 assert(IS_ALIGNED(src_width, 2));
407 assert(IS_ALIGNED(src_height, 2));
408 int yi = y >> 16;
409 const ptrdiff_t x_offset = (x >> 16) * 4;
411 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
412 if (TestCpuFlag(kCpuHasSSE2)) {
413 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 :
414 ScaleARGBRowDownEven_Any_SSE2;
415 if (IS_ALIGNED(dst_width, 4)) {
416 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
417 ScaleARGBRowDownEven_SSE2;
420 #endif
421 #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
422 if (TestCpuFlag(kCpuHasNEON)) {
423 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON :
424 ScaleARGBRowDownEven_Any_NEON;
425 if (IS_ALIGNED(dst_width, 4)) {
426 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
427 ScaleARGBRowDownEven_NEON;
430 #endif
432 YUVBuferIter iter;
433 iter.src_width = src_width;
434 iter.src_height = src_height;
435 iter.src_stride_y = src_stride_y;
436 iter.src_stride_u = src_stride_u;
437 iter.src_stride_v = src_stride_v;
438 iter.src_y = src_y;
439 iter.src_u = src_u;
440 iter.src_v = src_v;
441 YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
443 const int dyi = dy >> 16;
444 int lastyi = yi;
445 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
446 // Prepare next row if necessary
447 if (filtering != kFilterLinear) {
448 if ((yi + dyi) < (src_height - 1)) {
449 iter.MoveTo(iter, yi + dyi);
450 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
451 } else {
452 argb_cnv_rowstride = 0;
456 if (filtering == kFilterLinear) {
457 argb_cnv_rowstride = 0;
459 const int max_yi = src_height - 1;
460 const int max_yi_minus_dyi = max_yi - dyi;
461 for (j = 0; j < dst_height; ++j) {
462 if (yi != lastyi) {
463 if (yi > max_yi) {
464 yi = max_yi;
466 if (yi != lastyi) {
467 if (filtering == kFilterLinear) {
468 iter.MoveTo(iter, yi);
469 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
470 lastyi = yi;
471 } else {
472 // Prepare current row
473 if (yi == iter.y_index) {
474 argb_cnv_rowptr = argb_cnv_rowptr + argb_cnv_rowstride;
475 argb_cnv_rowstride = - argb_cnv_rowstride;
476 } else {
477 iter.MoveTo(iter, yi);
478 argb_cnv_rowptr = argb_cnv_row;
479 argb_cnv_rowstride = kRowSize;
480 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
482 // Prepare next row if necessary
483 if (iter.y_index < max_yi) {
484 int next_yi = yi < max_yi_minus_dyi ? yi + dyi : max_yi;
485 iter.MoveTo(iter, next_yi);
486 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
487 } else {
488 argb_cnv_rowstride = 0;
490 lastyi = yi;
494 ScaleARGBRowDownEven(argb_cnv_rowptr + x_offset, argb_cnv_rowstride, col_step, dst_argb, dst_width);
495 dst_argb += dst_stride_argb;
496 yi += dyi;
498 free_aligned_buffer_64(argb_cnv_row);
501 // Scale YUV to ARGB down with bilinear interpolation.
502 static void ScaleYUVToARGBBilinearDown(int src_width, int src_height,
503 int dst_width, int dst_height,
504 int src_stride_y,
505 int src_stride_u,
506 int src_stride_v,
507 int dst_stride_argb,
508 const uint8_t* src_y,
509 const uint8_t* src_u,
510 const uint8_t* src_v,
511 uint8_t* dst_argb,
512 int x, int dx, int y, int dy,
513 enum FilterMode filtering,
514 uint32_t src_fourcc,
515 YUVColorSpace yuv_color_space) {
516 int j;
517 void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
518 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
519 InterpolateRow_C;
520 void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb,
521 int dst_width, int x, int dx) =
522 (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
523 int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
524 int64_t xl = (dx >= 0) ? x : xlast;
525 int64_t xr = (dx >= 0) ? xlast : x;
526 int clip_src_width;
527 xl = (xl >> 16) & ~3; // Left edge aligned.
528 xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
529 xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
530 if (xr > src_width) {
531 xr = src_width;
533 clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
534 const ptrdiff_t xl_offset = xl * 4;
535 x -= (int)(xl << 16);
537 // Allocate 2 row of ARGB for source conversion.
538 const int kRowSize = (src_width * 4 + 15) & ~15;
539 align_buffer_64(argb_cnv_row, kRowSize * 2);
540 uint8_t* argb_cnv_rowptr = argb_cnv_row;
541 int argb_cnv_rowstride = kRowSize;
543 #if defined(HAS_INTERPOLATEROW_SSSE3)
544 if (TestCpuFlag(kCpuHasSSSE3)) {
545 InterpolateRow = InterpolateRow_Any_SSSE3;
546 if (IS_ALIGNED(clip_src_width, 16)) {
547 InterpolateRow = InterpolateRow_SSSE3;
550 #endif
551 #if defined(HAS_INTERPOLATEROW_AVX2)
552 if (TestCpuFlag(kCpuHasAVX2)) {
553 InterpolateRow = InterpolateRow_Any_AVX2;
554 if (IS_ALIGNED(clip_src_width, 32)) {
555 InterpolateRow = InterpolateRow_AVX2;
558 #endif
559 #if defined(HAS_INTERPOLATEROW_NEON)
560 if (TestCpuFlag(kCpuHasNEON)) {
561 InterpolateRow = InterpolateRow_Any_NEON;
562 if (IS_ALIGNED(clip_src_width, 16)) {
563 InterpolateRow = InterpolateRow_NEON;
566 #endif
567 #if defined(HAS_INTERPOLATEROW_DSPR2)
568 if (TestCpuFlag(kCpuHasDSPR2) &&
569 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(argb_cnv_rowstride, 4)) {
570 InterpolateRow = InterpolateRow_Any_DSPR2;
571 if (IS_ALIGNED(clip_src_width, 4)) {
572 InterpolateRow = InterpolateRow_DSPR2;
575 #endif
576 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
577 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
578 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
580 #endif
581 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
582 if (TestCpuFlag(kCpuHasNEON)) {
583 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
584 if (IS_ALIGNED(dst_width, 4)) {
585 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
588 #endif
590 int yi = y >> 16;
592 YUVBuferIter iter;
593 iter.src_width = src_width;
594 iter.src_height = src_height;
595 iter.src_stride_y = src_stride_y;
596 iter.src_stride_u = src_stride_u;
597 iter.src_stride_v = src_stride_v;
598 iter.src_y = src_y;
599 iter.src_u = src_u;
600 iter.src_v = src_v;
601 YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
602 iter.MoveTo(iter, yi);
604 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
605 // Allocate a row of ARGB.
606 align_buffer_64(row, clip_src_width * 4);
608 int lastyi = yi;
609 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
610 // Prepare next row if necessary
611 if (filtering != kFilterLinear) {
612 if ((yi + 1) < src_height) {
613 iter.MoveToNextRow(iter);
614 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
615 } else {
616 argb_cnv_rowstride = 0;
620 const int max_y = (src_height - 1) << 16;
621 const int max_yi = src_height - 1;
622 for (j = 0; j < dst_height; ++j) {
623 yi = y >> 16;
624 if (yi != lastyi) {
625 if (y > max_y) {
626 y = max_y;
627 yi = y >> 16;
629 if (yi != lastyi) {
630 if (filtering == kFilterLinear) {
631 iter.MoveTo(iter, yi);
632 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
633 lastyi = yi;
634 } else {
635 // Prepare current row
636 if (yi == iter.y_index) {
637 argb_cnv_rowptr = argb_cnv_rowptr + argb_cnv_rowstride;
638 argb_cnv_rowstride = - argb_cnv_rowstride;
639 } else {
640 iter.MoveTo(iter, yi);
641 argb_cnv_rowptr = argb_cnv_row;
642 argb_cnv_rowstride = kRowSize;
643 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
645 // Prepare next row if necessary
646 if (iter.y_index < max_yi) {
647 iter.MoveToNextRow(iter);
648 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
649 } else {
650 argb_cnv_rowstride = 0;
652 lastyi = yi;
656 if (filtering == kFilterLinear) {
657 ScaleARGBFilterCols(dst_argb, argb_cnv_rowptr + xl_offset, dst_width, x, dx);
658 } else {
659 int yf = (y >> 8) & 255;
660 InterpolateRow(row, argb_cnv_rowptr + xl_offset, argb_cnv_rowstride, clip_src_width, yf);
661 ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
663 dst_argb += dst_stride_argb;
664 y += dy;
666 free_aligned_buffer_64(row);
667 free_aligned_buffer_64(argb_cnv_row);
670 // Scale YUV to ARGB up with bilinear interpolation.
671 static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
672 int dst_width, int dst_height,
673 int src_stride_y,
674 int src_stride_u,
675 int src_stride_v,
676 int dst_stride_argb,
677 const uint8_t* src_y,
678 const uint8_t* src_u,
679 const uint8_t* src_v,
680 uint8_t* dst_argb,
681 int x, int dx, int y, int dy,
682 enum FilterMode filtering,
683 uint32_t src_fourcc,
684 YUVColorSpace yuv_color_space) {
685 int j;
686 void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
687 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
688 InterpolateRow_C;
689 void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb,
690 int dst_width, int x, int dx) =
691 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
692 const int max_y = (src_height - 1) << 16;
694 // Allocate 1 row of ARGB for source conversion.
695 align_buffer_64(argb_cnv_row, src_width * 4);
697 #if defined(HAS_INTERPOLATEROW_SSSE3)
698 if (TestCpuFlag(kCpuHasSSSE3)) {
699 InterpolateRow = InterpolateRow_Any_SSSE3;
700 if (IS_ALIGNED(dst_width, 4)) {
701 InterpolateRow = InterpolateRow_SSSE3;
704 #endif
705 #if defined(HAS_INTERPOLATEROW_AVX2)
706 if (TestCpuFlag(kCpuHasAVX2)) {
707 InterpolateRow = InterpolateRow_Any_AVX2;
708 if (IS_ALIGNED(dst_width, 8)) {
709 InterpolateRow = InterpolateRow_AVX2;
712 #endif
713 #if defined(HAS_INTERPOLATEROW_NEON)
714 if (TestCpuFlag(kCpuHasNEON)) {
715 InterpolateRow = InterpolateRow_Any_NEON;
716 if (IS_ALIGNED(dst_width, 4)) {
717 InterpolateRow = InterpolateRow_NEON;
720 #endif
721 #if defined(HAS_INTERPOLATEROW_DSPR2)
722 if (TestCpuFlag(kCpuHasDSPR2) &&
723 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
724 InterpolateRow = InterpolateRow_DSPR2;
726 #endif
727 if (src_width >= 32768) {
728 ScaleARGBFilterCols = filtering ?
729 ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
731 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
732 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
733 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
735 #endif
736 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
737 if (filtering && TestCpuFlag(kCpuHasNEON)) {
738 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
739 if (IS_ALIGNED(dst_width, 4)) {
740 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
743 #endif
744 #if defined(HAS_SCALEARGBCOLS_SSE2)
745 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
746 ScaleARGBFilterCols = ScaleARGBCols_SSE2;
748 #endif
749 #if defined(HAS_SCALEARGBCOLS_NEON)
750 if (!filtering && TestCpuFlag(kCpuHasNEON)) {
751 ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
752 if (IS_ALIGNED(dst_width, 8)) {
753 ScaleARGBFilterCols = ScaleARGBCols_NEON;
756 #endif
757 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
758 ScaleARGBFilterCols = ScaleARGBColsUp2_C;
759 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
760 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
761 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
763 #endif
766 if (y > max_y) {
767 y = max_y;
770 int yi = y >> 16;
772 YUVBuferIter iter;
773 iter.src_width = src_width;
774 iter.src_height = src_height;
775 iter.src_stride_y = src_stride_y;
776 iter.src_stride_u = src_stride_u;
777 iter.src_stride_v = src_stride_v;
778 iter.src_y = src_y;
779 iter.src_u = src_u;
780 iter.src_v = src_v;
781 YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
782 iter.MoveTo(iter, yi);
784 // Allocate 2 rows of ARGB.
785 const int kRowSize = (dst_width * 4 + 15) & ~15;
786 align_buffer_64(row, kRowSize * 2);
788 uint8_t* rowptr = row;
789 int rowstride = kRowSize;
790 int lastyi = yi;
792 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
793 ScaleARGBFilterCols(rowptr, argb_cnv_row, dst_width, x, dx);
795 if (filtering == kFilterLinear) {
796 rowstride = 0;
798 // Prepare next row if necessary
799 if (filtering != kFilterLinear) {
800 if ((yi + 1) < src_height) {
801 iter.MoveToNextRow(iter);
802 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
803 ScaleARGBFilterCols(rowptr + rowstride, argb_cnv_row, dst_width, x, dx);
804 }else {
805 rowstride = 0;
809 const int max_yi = src_height - 1;
810 for (j = 0; j < dst_height; ++j) {
811 yi = y >> 16;
812 if (yi != lastyi) {
813 if (y > max_y) {
814 y = max_y;
815 yi = y >> 16;
817 if (yi != lastyi) {
818 if (filtering == kFilterLinear) {
819 iter.MoveToNextRow(iter);
820 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
821 ScaleARGBFilterCols(rowptr, argb_cnv_row, dst_width, x, dx);
822 } else {
823 // Prepare next row if necessary
824 if (yi < max_yi) {
825 iter.MoveToNextRow(iter);
826 rowptr += rowstride;
827 rowstride = -rowstride;
828 // TODO(fbarchard): Convert the clipped region of row.
829 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
830 ScaleARGBFilterCols(rowptr + rowstride, argb_cnv_row, dst_width, x, dx);
831 } else {
832 rowstride = 0;
835 lastyi = yi;
838 if (filtering == kFilterLinear) {
839 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
840 } else {
841 int yf = (y >> 8) & 255;
842 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
844 dst_argb += dst_stride_argb;
845 y += dy;
847 free_aligned_buffer_64(row);
848 free_aligned_buffer_64(argb_cnv_row);
851 // Scale ARGB to/from any dimensions, without interpolation.
852 // Fixed point math is used for performance: The upper 16 bits
853 // of x and dx is the integer part of the source position and
854 // the lower 16 bits are the fixed decimal part.
856 static void ScaleYUVToARGBSimple(int src_width, int src_height,
857 int dst_width, int dst_height,
858 int src_stride_y,
859 int src_stride_u,
860 int src_stride_v,
861 int dst_stride_argb,
862 const uint8_t* src_y,
863 const uint8_t* src_u,
864 const uint8_t* src_v,
865 uint8_t* dst_argb,
866 int x, int dx, int y, int dy,
867 uint32_t src_fourcc,
868 YUVColorSpace yuv_color_space) {
869 int j;
870 void (*ScaleARGBCols)(uint8_t* dst_argb, const uint8_t* src_argb,
871 int dst_width, int x, int dx) =
872 (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
874 // Allocate 1 row of ARGB for source conversion.
875 align_buffer_64(argb_cnv_row, src_width * 4);
877 #if defined(HAS_SCALEARGBCOLS_SSE2)
878 if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
879 ScaleARGBCols = ScaleARGBCols_SSE2;
881 #endif
882 #if defined(HAS_SCALEARGBCOLS_NEON)
883 if (TestCpuFlag(kCpuHasNEON)) {
884 ScaleARGBCols = ScaleARGBCols_Any_NEON;
885 if (IS_ALIGNED(dst_width, 8)) {
886 ScaleARGBCols = ScaleARGBCols_NEON;
889 #endif
890 if (src_width * 2 == dst_width && x < 0x8000) {
891 ScaleARGBCols = ScaleARGBColsUp2_C;
892 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
893 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
894 ScaleARGBCols = ScaleARGBColsUp2_SSE2;
896 #endif
899 int yi = y >> 16;
901 YUVBuferIter iter;
902 iter.src_width = src_width;
903 iter.src_height = src_height;
904 iter.src_stride_y = src_stride_y;
905 iter.src_stride_u = src_stride_u;
906 iter.src_stride_v = src_stride_v;
907 iter.src_y = src_y;
908 iter.src_u = src_u;
909 iter.src_v = src_v;
910 YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
911 iter.MoveTo(iter, yi);
913 int lasty = yi;
914 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
916 for (j = 0; j < dst_height; ++j) {
917 yi = y >> 16;
918 if (yi != lasty) {
919 iter.MoveTo(iter, yi);
920 YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
921 lasty = yi;
923 ScaleARGBCols(dst_argb, argb_cnv_row, dst_width, x, dx);
924 dst_argb += dst_stride_argb;
925 y += dy;
927 free_aligned_buffer_64(argb_cnv_row);
930 static void YUVToARGBCopy(const uint8_t* src_y, int src_stride_y,
931 const uint8_t* src_u, int src_stride_u,
932 const uint8_t* src_v, int src_stride_v,
933 int src_width, int src_height,
934 uint8_t* dst_argb, int dst_stride_argb,
935 int dst_width, int dst_height,
936 uint32_t src_fourcc,
937 YUVColorSpace yuv_color_space)
939 YUVBuferIter iter;
940 iter.src_width = src_width;
941 iter.src_height = src_height;
942 iter.src_stride_y = src_stride_y;
943 iter.src_stride_u = src_stride_u;
944 iter.src_stride_v = src_stride_v;
945 iter.src_y = src_y;
946 iter.src_u = src_u;
947 iter.src_v = src_v;
948 YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
950 for (int j = 0; j < dst_height; ++j) {
951 YUVBuferIter_ConvertToARGBRow(iter, dst_argb);
952 iter.MoveToNextRow(iter);
953 dst_argb += dst_stride_argb;
957 static void ScaleYUVToARGB(const uint8_t* src_y, int src_stride_y,
958 const uint8_t* src_u, int src_stride_u,
959 const uint8_t* src_v, int src_stride_v,
960 int src_width, int src_height,
961 uint8_t* dst_argb, int dst_stride_argb,
962 int dst_width, int dst_height,
963 enum FilterMode filtering,
964 uint32_t src_fourcc,
965 YUVColorSpace yuv_color_space)
967 // Initial source x/y coordinate and step values as 16.16 fixed point.
968 int x = 0;
969 int y = 0;
970 int dx = 0;
971 int dy = 0;
972 // ARGB does not support box filter yet, but allow the user to pass it.
973 // Simplify filtering when possible.
974 filtering = ScaleFilterReduce(src_width, src_height,
975 dst_width, dst_height,
976 filtering);
977 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
978 &x, &y, &dx, &dy);
980 // Special case for integer step values.
981 if (((dx | dy) & 0xffff) == 0) {
982 if (!dx || !dy) { // 1 pixel wide and/or tall.
983 filtering = kFilterNone;
984 } else {
985 // Optimized even scale down. ie 2, 4, 6, 8, 10x.
986 if (!(dx & 0x10000) && !(dy & 0x10000)) {
987 if (dx == 0x20000) {
988 // Optimized 1/2 downsample.
989 ScaleYUVToARGBDown2(src_width, src_height,
990 dst_width, dst_height,
991 src_stride_y,
992 src_stride_u,
993 src_stride_v,
994 dst_stride_argb,
995 src_y,
996 src_u,
997 src_v,
998 dst_argb,
999 x, dx, y, dy,
1000 filtering,
1001 src_fourcc,
1002 yuv_color_space);
1003 return;
1005 ScaleYUVToARGBDownEven(src_width, src_height,
1006 dst_width, dst_height,
1007 src_stride_y,
1008 src_stride_u,
1009 src_stride_v,
1010 dst_stride_argb,
1011 src_y,
1012 src_u,
1013 src_v,
1014 dst_argb,
1015 x, dx, y, dy,
1016 filtering,
1017 src_fourcc,
1018 yuv_color_space);
1019 return;
1021 // Optimized odd scale down. ie 3, 5, 7, 9x.
1022 if ((dx & 0x10000) && (dy & 0x10000)) {
1023 filtering = kFilterNone;
1024 if (dx == 0x10000 && dy == 0x10000) {
1025 // Straight conversion and copy.
1026 YUVToARGBCopy(src_y, src_stride_y,
1027 src_u, src_stride_u,
1028 src_v, src_stride_v,
1029 src_width, src_height,
1030 dst_argb, dst_stride_argb,
1031 dst_width, dst_height,
1032 src_fourcc,
1033 yuv_color_space);
1034 return;
1039 if (filtering && dy < 65536) {
1040 ScaleYUVToARGBBilinearUp(src_width, src_height,
1041 dst_width, dst_height,
1042 src_stride_y,
1043 src_stride_u,
1044 src_stride_v,
1045 dst_stride_argb,
1046 src_y,
1047 src_u,
1048 src_v,
1049 dst_argb,
1050 x, dx, y, dy,
1051 filtering,
1052 src_fourcc,
1053 yuv_color_space);
1054 return;
1056 if (filtering) {
1057 ScaleYUVToARGBBilinearDown(src_width, src_height,
1058 dst_width, dst_height,
1059 src_stride_y,
1060 src_stride_u,
1061 src_stride_v,
1062 dst_stride_argb,
1063 src_y,
1064 src_u,
1065 src_v,
1066 dst_argb,
1067 x, dx, y, dy,
1068 filtering,
1069 src_fourcc,
1070 yuv_color_space);
1071 return;
1073 ScaleYUVToARGBSimple(src_width, src_height,
1074 dst_width, dst_height,
1075 src_stride_y,
1076 src_stride_u,
1077 src_stride_v,
1078 dst_stride_argb,
1079 src_y,
1080 src_u,
1081 src_v,
1082 dst_argb,
1083 x, dx, y, dy,
1084 src_fourcc,
1085 yuv_color_space);
1088 bool IsConvertSupported(uint32_t src_fourcc)
1090 if (src_fourcc == FOURCC_I444 ||
1091 src_fourcc == FOURCC_I422 ||
1092 src_fourcc == FOURCC_I420) {
1093 return true;
1095 return false;
1098 LIBYUV_API
1099 int YUVToARGBScale(const uint8_t* src_y, int src_stride_y,
1100 const uint8_t* src_u, int src_stride_u,
1101 const uint8_t* src_v, int src_stride_v,
1102 uint32_t src_fourcc,
1103 YUVColorSpace yuv_color_space,
1104 int src_width, int src_height,
1105 uint8_t* dst_argb, int dst_stride_argb,
1106 int dst_width, int dst_height,
1107 enum FilterMode filtering)
1109 if (!src_y || !src_u || !src_v ||
1110 src_width == 0 || src_height == 0 ||
1111 !dst_argb || dst_width <= 0 || dst_height <= 0) {
1112 return -1;
1114 if (!IsConvertSupported(src_fourcc)) {
1115 return -1;
1117 ScaleYUVToARGB(src_y, src_stride_y,
1118 src_u, src_stride_u,
1119 src_v, src_stride_v,
1120 src_width, src_height,
1121 dst_argb, dst_stride_argb,
1122 dst_width, dst_height,
1123 filtering,
1124 src_fourcc,
1125 yuv_color_space);
1126 return 0;
1129 #ifdef __cplusplus
1130 } // extern "C"
1131 } // namespace libyuv
1132 #endif