1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
11 // FilterRows combines two rows of the image using linear interpolation.
12 // SSE2 version does 16 pixels at a time.
13 void FilterRows_SSE2(uint8_t* ybuf
, const uint8_t* y0_ptr
, const uint8_t* y1_ptr
,
14 int source_width
, int source_y_fraction
) {
15 __m128i zero
= _mm_setzero_si128();
16 __m128i y1_fraction
= _mm_set1_epi16(source_y_fraction
);
17 __m128i y0_fraction
= _mm_set1_epi16(256 - source_y_fraction
);
19 const __m128i
* y0_ptr128
= reinterpret_cast<const __m128i
*>(y0_ptr
);
20 const __m128i
* y1_ptr128
= reinterpret_cast<const __m128i
*>(y1_ptr
);
21 __m128i
* dest128
= reinterpret_cast<__m128i
*>(ybuf
);
22 __m128i
* end128
= reinterpret_cast<__m128i
*>(ybuf
+ source_width
);
25 __m128i y0
= _mm_loadu_si128(y0_ptr128
);
26 __m128i y1
= _mm_loadu_si128(y1_ptr128
);
27 __m128i y2
= _mm_unpackhi_epi8(y0
, zero
);
28 __m128i y3
= _mm_unpackhi_epi8(y1
, zero
);
29 y0
= _mm_unpacklo_epi8(y0
, zero
);
30 y1
= _mm_unpacklo_epi8(y1
, zero
);
31 y0
= _mm_mullo_epi16(y0
, y0_fraction
);
32 y1
= _mm_mullo_epi16(y1
, y1_fraction
);
33 y2
= _mm_mullo_epi16(y2
, y0_fraction
);
34 y3
= _mm_mullo_epi16(y3
, y1_fraction
);
35 y0
= _mm_add_epi16(y0
, y1
);
36 y2
= _mm_add_epi16(y2
, y3
);
37 y0
= _mm_srli_epi16(y0
, 8);
38 y2
= _mm_srli_epi16(y2
, 8);
39 y0
= _mm_packus_epi16(y0
, y2
);
43 } while (dest128
< end128
);
47 } // namespace mozilla