1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
11 // FilterRows combines two rows of the image using linear interpolation.
12 // MMX version does 8 pixels at a time.
13 void FilterRows_MMX(uint8_t* ybuf
, const uint8_t* y0_ptr
, const uint8_t* y1_ptr
,
14 int source_width
, int source_y_fraction
) {
15 __m64 zero
= _mm_setzero_si64();
16 __m64 y1_fraction
= _mm_set1_pi16(source_y_fraction
);
17 __m64 y0_fraction
= _mm_set1_pi16(256 - source_y_fraction
);
19 const __m64
* y0_ptr64
= reinterpret_cast<const __m64
*>(y0_ptr
);
20 const __m64
* y1_ptr64
= reinterpret_cast<const __m64
*>(y1_ptr
);
21 __m64
* dest64
= reinterpret_cast<__m64
*>(ybuf
);
22 __m64
* end64
= reinterpret_cast<__m64
*>(ybuf
+ source_width
);
25 __m64 y0
= *y0_ptr64
++;
26 __m64 y1
= *y1_ptr64
++;
27 __m64 y2
= _mm_unpackhi_pi8(y0
, zero
);
28 __m64 y3
= _mm_unpackhi_pi8(y1
, zero
);
29 y0
= _mm_unpacklo_pi8(y0
, zero
);
30 y1
= _mm_unpacklo_pi8(y1
, zero
);
31 y0
= _mm_mullo_pi16(y0
, y0_fraction
);
32 y1
= _mm_mullo_pi16(y1
, y1_fraction
);
33 y2
= _mm_mullo_pi16(y2
, y0_fraction
);
34 y3
= _mm_mullo_pi16(y3
, y1_fraction
);
35 y0
= _mm_add_pi16(y0
, y1
);
36 y2
= _mm_add_pi16(y2
, y3
);
37 y0
= _mm_srli_pi16(y0
, 8);
38 y2
= _mm_srli_pi16(y2
, 8);
39 y0
= _mm_packs_pu16(y0
, y2
);
41 } while (dest64
< end64
);
45 } // namespace mozilla