1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
6 #include "mozilla/SSE.h"
12 #if defined(ARCH_CPU_X86_64)
14 // We don't need CPUID guards here, since x86-64 implies SSE2.
16 // AMD64 ABI uses register paremters.
17 void FastConvertYUVToRGB32Row(const uint8_t* y_buf
, // rdi
18 const uint8_t* u_buf
, // rsi
19 const uint8_t* v_buf
, // rdx
20 uint8_t* rgb_buf
, // rcx
25 "movzb (%[u_buf]),%%r10\n"
27 "movzb (%[v_buf]),%%r11\n"
29 "movq 2048(%[kCoefficientsRgbY],%%r10,8),%%xmm0\n"
30 "movzb (%[y_buf]),%%r10\n"
31 "movq 4096(%[kCoefficientsRgbY],%%r11,8),%%xmm1\n"
32 "movzb 0x1(%[y_buf]),%%r11\n"
33 "paddsw %%xmm1,%%xmm0\n"
34 "movq (%[kCoefficientsRgbY],%%r10,8),%%xmm2\n"
36 "movq (%[kCoefficientsRgbY],%%r11,8),%%xmm3\n"
37 "paddsw %%xmm0,%%xmm2\n"
38 "paddsw %%xmm0,%%xmm3\n"
39 "shufps $0x44,%%xmm3,%%xmm2\n"
41 "packuswb %%xmm2,%%xmm2\n"
42 "movq %%xmm2,0x0(%[rgb_buf])\n"
43 "add $0x8,%[rgb_buf]\n"
52 "movzb (%[u_buf]),%%r10\n"
53 "movq 2048(%[kCoefficientsRgbY],%%r10,8),%%xmm0\n"
54 "movzb (%[v_buf]),%%r10\n"
55 "movq 4096(%[kCoefficientsRgbY],%%r10,8),%%xmm1\n"
56 "paddsw %%xmm1,%%xmm0\n"
57 "movzb (%[y_buf]),%%r10\n"
58 "movq (%[kCoefficientsRgbY],%%r10,8),%%xmm1\n"
59 "paddsw %%xmm0,%%xmm1\n"
61 "packuswb %%xmm1,%%xmm1\n"
62 "movd %%xmm1,0x0(%[rgb_buf])\n"
64 : [y_buf
] "+r"(y_buf
),
67 [rgb_buf
] "+r"(rgb_buf
),
69 : [kCoefficientsRgbY
] "r" (kCoefficientsRgbY
)
70 : "cc", "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
74 void ScaleYUVToRGB32Row(const uint8_t* y_buf
, // rdi
75 const uint8_t* u_buf
, // rsi
76 const uint8_t* v_buf
, // rdx
77 uint8_t* rgb_buf
, // rcx
79 int source_dx
) { // r9
88 "movzb (%[u_buf],%%r10,1),%%rax\n"
89 "movq 2048(%[kCoefficientsRgbY],%%rax,8),%%xmm0\n"
90 "movzb (%[v_buf],%%r10,1),%%rax\n"
91 "movq 4096(%[kCoefficientsRgbY],%%rax,8),%%xmm1\n"
92 "lea (%%r11,%[source_dx]),%%r10\n"
94 "movzb (%[y_buf],%%r11,1),%%rax\n"
95 "paddsw %%xmm1,%%xmm0\n"
96 "movq (%[kCoefficientsRgbY],%%rax,8),%%xmm1\n"
97 "lea (%%r10,%[source_dx]),%%r11\n"
99 "movzb (%[y_buf],%%r10,1),%%rax\n"
100 "movq (%[kCoefficientsRgbY],%%rax,8),%%xmm2\n"
101 "paddsw %%xmm0,%%xmm1\n"
102 "paddsw %%xmm0,%%xmm2\n"
103 "shufps $0x44,%%xmm2,%%xmm1\n"
104 "psraw $0x6,%%xmm1\n"
105 "packuswb %%xmm1,%%xmm1\n"
106 "movq %%xmm1,0x0(%[rgb_buf])\n"
107 "add $0x8,%[rgb_buf]\n"
108 "sub $0x2,%[width]\n"
112 "add $0x1,%[width]\n"
117 "movzb (%[u_buf],%%r10,1),%%rax\n"
118 "movq 2048(%[kCoefficientsRgbY],%%rax,8),%%xmm0\n"
119 "movzb (%[v_buf],%%r10,1),%%rax\n"
120 "movq 4096(%[kCoefficientsRgbY],%%rax,8),%%xmm1\n"
121 "paddsw %%xmm1,%%xmm0\n"
123 "movzb (%[y_buf],%%r11,1),%%rax\n"
124 "movq (%[kCoefficientsRgbY],%%rax,8),%%xmm1\n"
125 "paddsw %%xmm0,%%xmm1\n"
126 "psraw $0x6,%%xmm1\n"
127 "packuswb %%xmm1,%%xmm1\n"
128 "movd %%xmm1,0x0(%[rgb_buf])\n"
131 : [rgb_buf
] "+r"(rgb_buf
),
133 : [y_buf
] "r"(y_buf
),
136 [kCoefficientsRgbY
] "r" (kCoefficientsRgbY
),
137 [source_dx
] "r"(static_cast<long>(source_dx
))
138 : "cc", "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2"
142 void LinearScaleYUVToRGB32Row(const uint8_t* y_buf
,
143 const uint8_t* u_buf
,
144 const uint8_t* v_buf
,
149 "xor %%r11,%%r11\n" // x = 0
150 "sub $0x2,%[width]\n"
152 "cmp $0x20000,%[source_dx]\n" // if source_dx >= 2.0
154 "mov $0x8000,%%r11\n" // x = 0.5 for 1/2 or less
161 "movzb (%[u_buf], %%r10, 1), %%r13 \n"
162 "movzb 1(%[u_buf], %%r10, 1), %%r14 \n"
163 "mov %%r11, %%rax \n"
164 "and $0x1fffe, %%rax \n"
165 "imul %%rax, %%r14 \n"
166 "xor $0x1fffe, %%rax \n"
167 "imul %%rax, %%r13 \n"
168 "add %%r14, %%r13 \n"
170 "movq 2048(%[kCoefficientsRgbY],%%r13,8), %%xmm0\n"
172 "movzb (%[v_buf], %%r10, 1), %%r13 \n"
173 "movzb 1(%[v_buf], %%r10, 1), %%r14 \n"
174 "mov %%r11, %%rax \n"
175 "and $0x1fffe, %%rax \n"
176 "imul %%rax, %%r14 \n"
177 "xor $0x1fffe, %%rax \n"
178 "imul %%rax, %%r13 \n"
179 "add %%r14, %%r13 \n"
181 "movq 4096(%[kCoefficientsRgbY],%%r13,8), %%xmm1\n"
183 "mov %%r11, %%rax \n"
184 "lea (%%r11,%[source_dx]),%%r10\n"
186 "paddsw %%xmm1,%%xmm0\n"
188 "movzb (%[y_buf], %%r11, 1), %%r13 \n"
189 "movzb 1(%[y_buf], %%r11, 1), %%r14 \n"
190 "and $0xffff, %%rax \n"
191 "imul %%rax, %%r14 \n"
192 "xor $0xffff, %%rax \n"
193 "imul %%rax, %%r13 \n"
194 "add %%r14, %%r13 \n"
196 "movq (%[kCoefficientsRgbY],%%r13,8),%%xmm1\n"
198 "mov %%r10, %%rax \n"
199 "lea (%%r10,%[source_dx]),%%r11\n"
202 "movzb (%[y_buf],%%r10,1), %%r13 \n"
203 "movzb 1(%[y_buf],%%r10,1), %%r14 \n"
204 "and $0xffff, %%rax \n"
205 "imul %%rax, %%r14 \n"
206 "xor $0xffff, %%rax \n"
207 "imul %%rax, %%r13 \n"
208 "add %%r14, %%r13 \n"
210 "movq (%[kCoefficientsRgbY],%%r13,8),%%xmm2\n"
212 "paddsw %%xmm0,%%xmm1\n"
213 "paddsw %%xmm0,%%xmm2\n"
214 "shufps $0x44,%%xmm2,%%xmm1\n"
215 "psraw $0x6,%%xmm1\n"
216 "packuswb %%xmm1,%%xmm1\n"
217 "movq %%xmm1,0x0(%[rgb_buf])\n"
218 "add $0x8,%[rgb_buf]\n"
219 "sub $0x2,%[width]\n"
223 "add $0x1,%[width]\n"
229 "movzb (%[u_buf],%%r10,1), %%r13 \n"
230 "movq 2048(%[kCoefficientsRgbY],%%r13,8),%%xmm0\n"
232 "movzb (%[v_buf],%%r10,1), %%r13 \n"
233 "movq 4096(%[kCoefficientsRgbY],%%r13,8),%%xmm1\n"
235 "paddsw %%xmm1,%%xmm0\n"
238 "movzb (%[y_buf],%%r11,1), %%r13 \n"
239 "movq (%[kCoefficientsRgbY],%%r13,8),%%xmm1\n"
241 "paddsw %%xmm0,%%xmm1\n"
242 "psraw $0x6,%%xmm1\n"
243 "packuswb %%xmm1,%%xmm1\n"
244 "movd %%xmm1,0x0(%[rgb_buf])\n"
247 : [rgb_buf
] "+r"(rgb_buf
),
249 : [y_buf
] "r"(y_buf
),
252 [kCoefficientsRgbY
] "r" (kCoefficientsRgbY
),
253 [source_dx
] "r"(static_cast<long>(source_dx
))
254 : "cc", "memory", "r10", "r11", "r13", "r14", "rax", "xmm0", "xmm1", "xmm2"
258 #elif defined(MOZILLA_MAY_SUPPORT_SSE) && defined(ARCH_CPU_X86_32) && !defined(__PIC__)
260 // PIC version is slower because less registers are available, so
261 // non-PIC is used on platforms where it is possible.
262 void FastConvertYUVToRGB32Row_SSE(const uint8_t* y_buf
,
263 const uint8_t* u_buf
,
264 const uint8_t* v_buf
,
269 ".global FastConvertYUVToRGB32Row_SSE\n"
270 ".type FastConvertYUVToRGB32Row_SSE, @function\n"
271 "FastConvertYUVToRGB32Row_SSE:\n"
273 "mov 0x24(%esp),%edx\n"
274 "mov 0x28(%esp),%edi\n"
275 "mov 0x2c(%esp),%esi\n"
276 "mov 0x30(%esp),%ebp\n"
277 "mov 0x34(%esp),%ecx\n"
281 "movzbl (%edi),%eax\n"
283 "movzbl (%esi),%ebx\n"
285 "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
286 "movzbl (%edx),%eax\n"
287 "paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
288 "movzbl 0x1(%edx),%ebx\n"
289 "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
291 "movq kCoefficientsRgbY(,%ebx,8),%mm2\n"
296 "packuswb %mm2,%mm1\n"
297 "movntq %mm1,0x0(%ebp)\n"
306 "movzbl (%edi),%eax\n"
307 "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
308 "movzbl (%esi),%eax\n"
309 "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
310 "movzbl (%edx),%eax\n"
311 "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
314 "packuswb %mm1,%mm1\n"
315 "movd %mm1,0x0(%ebp)\n"
319 #if !defined(XP_MACOSX)
324 void FastConvertYUVToRGB32Row(const uint8_t* y_buf
,
325 const uint8_t* u_buf
,
326 const uint8_t* v_buf
,
330 if (mozilla::supports_sse()) {
331 FastConvertYUVToRGB32Row_SSE(y_buf
, u_buf
, v_buf
, rgb_buf
, width
);
335 FastConvertYUVToRGB32Row_C(y_buf
, u_buf
, v_buf
, rgb_buf
, width
, 1);
339 void ScaleYUVToRGB32Row_SSE(const uint8_t* y_buf
,
340 const uint8_t* u_buf
,
341 const uint8_t* v_buf
,
347 ".global ScaleYUVToRGB32Row_SSE\n"
348 ".type ScaleYUVToRGB32Row_SSE, @function\n"
349 "ScaleYUVToRGB32Row_SSE:\n"
351 "mov 0x24(%esp),%edx\n"
352 "mov 0x28(%esp),%edi\n"
353 "mov 0x2c(%esp),%esi\n"
354 "mov 0x30(%esp),%ebp\n"
355 "mov 0x34(%esp),%ecx\n"
362 "movzbl (%edi,%eax,1),%eax\n"
363 "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
366 "movzbl (%esi,%eax,1),%eax\n"
367 "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
369 "add 0x38(%esp),%ebx\n"
371 "movzbl (%edx,%eax,1),%eax\n"
372 "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
374 "add 0x38(%esp),%ebx\n"
376 "movzbl (%edx,%eax,1),%eax\n"
377 "movq kCoefficientsRgbY(,%eax,8),%mm2\n"
382 "packuswb %mm2,%mm1\n"
383 "movntq %mm1,0x0(%ebp)\n"
394 "movzbl (%edi,%eax,1),%eax\n"
395 "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
398 "movzbl (%esi,%eax,1),%eax\n"
399 "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
402 "movzbl (%edx,%eax,1),%eax\n"
403 "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
406 "packuswb %mm1,%mm1\n"
407 "movd %mm1,0x0(%ebp)\n"
412 #if !defined(XP_MACOSX)
417 void ScaleYUVToRGB32Row(const uint8_t* y_buf
,
418 const uint8_t* u_buf
,
419 const uint8_t* v_buf
,
424 if (mozilla::supports_sse()) {
425 ScaleYUVToRGB32Row_SSE(y_buf
, u_buf
, v_buf
, rgb_buf
,
430 ScaleYUVToRGB32Row_C(y_buf
, u_buf
, v_buf
, rgb_buf
,
434 void LinearScaleYUVToRGB32Row_SSE(const uint8_t* y_buf
,
435 const uint8_t* u_buf
,
436 const uint8_t* v_buf
,
442 ".global LinearScaleYUVToRGB32Row_SSE\n"
443 ".type LinearScaleYUVToRGB32Row_SSE, @function\n"
444 "LinearScaleYUVToRGB32Row_SSE:\n"
446 "mov 0x24(%esp),%edx\n"
447 "mov 0x28(%esp),%edi\n"
448 "mov 0x30(%esp),%ebp\n"
450 // source_width = width * source_dx + ebx
451 "mov 0x34(%esp), %ecx\n"
452 "imull 0x38(%esp), %ecx\n"
453 "mov %ecx, 0x34(%esp)\n"
455 "mov 0x38(%esp), %ecx\n"
456 "xor %ebx,%ebx\n" // x = 0
457 "cmp $0x20000,%ecx\n" // if source_dx >= 2.0
459 "mov $0x8000,%ebx\n" // x = 0.5 for 1/2 or less
466 "movzbl (%edi,%eax,1),%ecx\n"
467 "movzbl 1(%edi,%eax,1),%esi\n"
469 "andl $0x1fffe, %eax \n"
471 "xorl $0x1fffe, %eax \n"
475 "movq kCoefficientsRgbY+2048(,%ecx,8),%mm0\n"
477 "mov 0x2c(%esp),%esi\n"
481 "movzbl (%esi,%eax,1),%ecx\n"
482 "movzbl 1(%esi,%eax,1),%esi\n"
484 "andl $0x1fffe, %eax \n"
486 "xorl $0x1fffe, %eax \n"
490 "paddsw kCoefficientsRgbY+4096(,%ecx,8),%mm0\n"
494 "movzbl (%edx,%eax,1),%ecx\n"
495 "movzbl 1(%edx,%eax,1),%esi\n"
497 "add 0x38(%esp),%ebx\n"
498 "andl $0xffff, %eax \n"
500 "xorl $0xffff, %eax \n"
504 "movq kCoefficientsRgbY(,%ecx,8),%mm1\n"
506 "cmp 0x34(%esp), %ebx\n"
511 "movzbl (%edx,%eax,1),%ecx\n"
512 "movzbl 1(%edx,%eax,1),%esi\n"
514 "add 0x38(%esp),%ebx\n"
515 "andl $0xffff, %eax \n"
517 "xorl $0xffff, %eax \n"
521 "movq kCoefficientsRgbY(,%ecx,8),%mm2\n"
527 "packuswb %mm2,%mm1\n"
528 "movntq %mm1,0x0(%ebp)\n"
532 "cmp 0x34(%esp), %ebx\n"
538 "paddsw %mm0, %mm1\n"
540 "packuswb %mm1, %mm1\n"
541 "movd %mm1, (%ebp)\n"
544 #if !defined(XP_MACOSX)
549 void LinearScaleYUVToRGB32Row(const uint8_t* y_buf
,
550 const uint8_t* u_buf
,
551 const uint8_t* v_buf
,
556 if (mozilla::supports_sse()) {
557 LinearScaleYUVToRGB32Row_SSE(y_buf
, u_buf
, v_buf
, rgb_buf
,
562 LinearScaleYUVToRGB32Row_C(y_buf
, u_buf
, v_buf
, rgb_buf
,
566 #elif defined(MOZILLA_MAY_SUPPORT_SSE) && defined(ARCH_CPU_X86_32) && defined(__PIC__)
568 void PICConvertYUVToRGB32Row_SSE(const uint8_t* y_buf
,
569 const uint8_t* u_buf
,
570 const uint8_t* v_buf
,
573 const int16_t *kCoefficientsRgbY
);
577 #if defined(XP_MACOSX)
578 "_PICConvertYUVToRGB32Row_SSE:\n"
580 "PICConvertYUVToRGB32Row_SSE:\n"
583 "mov 0x24(%esp),%edx\n"
584 "mov 0x28(%esp),%edi\n"
585 "mov 0x2c(%esp),%esi\n"
586 "mov 0x30(%esp),%ebp\n"
587 "mov 0x38(%esp),%ecx\n"
592 "movzbl (%edi),%eax\n"
594 "movzbl (%esi),%ebx\n"
596 "movq 2048(%ecx,%eax,8),%mm0\n"
597 "movzbl (%edx),%eax\n"
598 "paddsw 4096(%ecx,%ebx,8),%mm0\n"
599 "movzbl 0x1(%edx),%ebx\n"
600 "movq 0(%ecx,%eax,8),%mm1\n"
602 "movq 0(%ecx,%ebx,8),%mm2\n"
607 "packuswb %mm2,%mm1\n"
608 "movntq %mm1,0x0(%ebp)\n"
611 "subl $0x2,0x34(%esp)\n"
614 "andl $0x1,0x34(%esp)\n"
617 "movzbl (%edi),%eax\n"
618 "movq 2048(%ecx,%eax,8),%mm0\n"
619 "movzbl (%esi),%eax\n"
620 "paddsw 4096(%ecx,%eax,8),%mm0\n"
621 "movzbl (%edx),%eax\n"
622 "movq 0(%ecx,%eax,8),%mm1\n"
625 "packuswb %mm1,%mm1\n"
626 "movd %mm1,0x0(%ebp)\n"
630 #if !defined(XP_MACOSX)
635 void FastConvertYUVToRGB32Row(const uint8_t* y_buf
,
636 const uint8_t* u_buf
,
637 const uint8_t* v_buf
,
641 if (mozilla::supports_sse()) {
642 PICConvertYUVToRGB32Row_SSE(y_buf
, u_buf
, v_buf
, rgb_buf
, width
,
643 &kCoefficientsRgbY
[0][0]);
647 FastConvertYUVToRGB32Row_C(y_buf
, u_buf
, v_buf
, rgb_buf
, width
, 1);
650 void PICScaleYUVToRGB32Row_SSE(const uint8_t* y_buf
,
651 const uint8_t* u_buf
,
652 const uint8_t* v_buf
,
656 const int16_t *kCoefficientsRgbY
);
660 #if defined(XP_MACOSX)
661 "_PICScaleYUVToRGB32Row_SSE:\n"
663 "PICScaleYUVToRGB32Row_SSE:\n"
666 "mov 0x24(%esp),%edx\n"
667 "mov 0x28(%esp),%edi\n"
668 "mov 0x2c(%esp),%esi\n"
669 "mov 0x30(%esp),%ebp\n"
670 "mov 0x3c(%esp),%ecx\n"
677 "movzbl (%edi,%eax,1),%eax\n"
678 "movq 2048(%ecx,%eax,8),%mm0\n"
681 "movzbl (%esi,%eax,1),%eax\n"
682 "paddsw 4096(%ecx,%eax,8),%mm0\n"
684 "add 0x38(%esp),%ebx\n"
686 "movzbl (%edx,%eax,1),%eax\n"
687 "movq 0(%ecx,%eax,8),%mm1\n"
689 "add 0x38(%esp),%ebx\n"
691 "movzbl (%edx,%eax,1),%eax\n"
692 "movq 0(%ecx,%eax,8),%mm2\n"
697 "packuswb %mm2,%mm1\n"
698 "movntq %mm1,0x0(%ebp)\n"
701 "subl $0x2,0x34(%esp)\n"
704 "andl $0x1,0x34(%esp)\n"
709 "movzbl (%edi,%eax,1),%eax\n"
710 "movq 2048(%ecx,%eax,8),%mm0\n"
713 "movzbl (%esi,%eax,1),%eax\n"
714 "paddsw 4096(%ecx,%eax,8),%mm0\n"
717 "movzbl (%edx,%eax,1),%eax\n"
718 "movq 0(%ecx,%eax,8),%mm1\n"
721 "packuswb %mm1,%mm1\n"
722 "movd %mm1,0x0(%ebp)\n"
727 #if !defined(XP_MACOSX)
732 void ScaleYUVToRGB32Row(const uint8_t* y_buf
,
733 const uint8_t* u_buf
,
734 const uint8_t* v_buf
,
739 if (mozilla::supports_sse()) {
740 PICScaleYUVToRGB32Row_SSE(y_buf
, u_buf
, v_buf
, rgb_buf
, width
, source_dx
,
741 &kCoefficientsRgbY
[0][0]);
745 ScaleYUVToRGB32Row_C(y_buf
, u_buf
, v_buf
, rgb_buf
, width
, source_dx
);
748 void PICLinearScaleYUVToRGB32Row_SSE(const uint8_t* y_buf
,
749 const uint8_t* u_buf
,
750 const uint8_t* v_buf
,
754 const int16_t *kCoefficientsRgbY
);
758 #if defined(XP_MACOSX)
759 "_PICLinearScaleYUVToRGB32Row_SSE:\n"
761 "PICLinearScaleYUVToRGB32Row_SSE:\n"
764 "mov 0x24(%esp),%edx\n"
765 "mov 0x30(%esp),%ebp\n"
766 "mov 0x34(%esp),%ecx\n"
767 "mov 0x3c(%esp),%edi\n"
770 // source_width = width * source_dx + ebx
771 "mov 0x34(%esp), %ecx\n"
772 "imull 0x38(%esp), %ecx\n"
773 "mov %ecx, 0x34(%esp)\n"
775 "mov 0x38(%esp), %ecx\n"
776 "xor %ebx,%ebx\n" // x = 0
777 "cmp $0x20000,%ecx\n" // if source_dx >= 2.0
779 "mov $0x8000,%ebx\n" // x = 0.5 for 1/2 or less
783 "mov 0x28(%esp),%esi\n"
787 "movzbl (%esi,%eax,1),%ecx\n"
788 "movzbl 1(%esi,%eax,1),%esi\n"
790 "andl $0x1fffe, %eax \n"
792 "xorl $0x1fffe, %eax \n"
796 "movq 2048(%edi,%ecx,8),%mm0\n"
798 "mov 0x2c(%esp),%esi\n"
802 "movzbl (%esi,%eax,1),%ecx\n"
803 "movzbl 1(%esi,%eax,1),%esi\n"
805 "andl $0x1fffe, %eax \n"
807 "xorl $0x1fffe, %eax \n"
811 "paddsw 4096(%edi,%ecx,8),%mm0\n"
815 "movzbl (%edx,%eax,1),%ecx\n"
816 "movzbl 1(%edx,%eax,1),%esi\n"
818 "add 0x38(%esp),%ebx\n"
819 "andl $0xffff, %eax \n"
821 "xorl $0xffff, %eax \n"
825 "movq (%edi,%ecx,8),%mm1\n"
827 "cmp 0x34(%esp), %ebx\n"
832 "movzbl (%edx,%eax,1),%ecx\n"
833 "movzbl 1(%edx,%eax,1),%esi\n"
835 "add 0x38(%esp),%ebx\n"
836 "andl $0xffff, %eax \n"
838 "xorl $0xffff, %eax \n"
842 "movq (%edi,%ecx,8),%mm2\n"
848 "packuswb %mm2,%mm1\n"
849 "movntq %mm1,0x0(%ebp)\n"
853 "cmp %ebx, 0x34(%esp)\n"
859 "paddsw %mm0, %mm1\n"
861 "packuswb %mm1, %mm1\n"
862 "movd %mm1, (%ebp)\n"
865 #if !defined(XP_MACOSX)
871 void LinearScaleYUVToRGB32Row(const uint8_t* y_buf
,
872 const uint8_t* u_buf
,
873 const uint8_t* v_buf
,
878 if (mozilla::supports_sse()) {
879 PICLinearScaleYUVToRGB32Row_SSE(y_buf
, u_buf
, v_buf
, rgb_buf
, width
,
880 source_dx
, &kCoefficientsRgbY
[0][0]);
884 LinearScaleYUVToRGB32Row_C(y_buf
, u_buf
, v_buf
, rgb_buf
, width
, source_dx
);
887 void FastConvertYUVToRGB32Row(const uint8_t* y_buf
,
888 const uint8_t* u_buf
,
889 const uint8_t* v_buf
,
892 FastConvertYUVToRGB32Row_C(y_buf
, u_buf
, v_buf
, rgb_buf
, width
, 1);
895 void ScaleYUVToRGB32Row(const uint8_t* y_buf
,
896 const uint8_t* u_buf
,
897 const uint8_t* v_buf
,
901 ScaleYUVToRGB32Row_C(y_buf
, u_buf
, v_buf
, rgb_buf
, width
, source_dx
);
904 void LinearScaleYUVToRGB32Row(const uint8_t* y_buf
,
905 const uint8_t* u_buf
,
906 const uint8_t* v_buf
,
910 LinearScaleYUVToRGB32Row_C(y_buf
, u_buf
, v_buf
, rgb_buf
, width
, source_dx
);