2 * motion_comp_altivec.c
3 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
7 * See http://libmpeg2.sourceforge.net/ for updates.
9 * mpeg2dec is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * mpeg2dec is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
34 #include "attributes.h"
35 #include "mpeg2_internal.h"
37 typedef vector
signed char vector_s8_t
;
38 typedef vector
unsigned char vector_u8_t
;
39 typedef vector
signed short vector_s16_t
;
40 typedef vector
unsigned short vector_u16_t
;
41 typedef vector
signed int vector_s32_t
;
42 typedef vector
unsigned int vector_u32_t
;
44 #ifndef COFFEE_BREAK /* Workarounds for gcc suckage */
46 static inline vector_u8_t
my_vec_ld (int const A
, const uint8_t * const B
)
48 return vec_ld (A
, (uint8_t *)B
);
51 #define vec_ld my_vec_ld
53 static inline vector_u8_t
my_vec_and (vector_u8_t
const A
, vector_u8_t
const B
)
55 return vec_and (A
, B
);
58 #define vec_and my_vec_and
60 static inline vector_u8_t
my_vec_avg (vector_u8_t
const A
, vector_u8_t
const B
)
62 return vec_avg (A
, B
);
65 #define vec_avg my_vec_avg
69 static void MC_put_o_16_altivec (uint8_t * dest
, const uint8_t * ref
,
70 const int stride
, int height
)
72 vector_u8_t perm
, ref0
, ref1
, tmp
;
74 perm
= vec_lvsl (0, ref
);
76 height
= (height
>> 1) - 1;
78 ref0
= vec_ld (0, ref
);
79 ref1
= vec_ld (15, ref
);
81 tmp
= vec_perm (ref0
, ref1
, perm
);
84 ref0
= vec_ld (0, ref
);
85 ref1
= vec_ld (15, ref
);
87 vec_st (tmp
, 0, dest
);
88 tmp
= vec_perm (ref0
, ref1
, perm
);
90 ref0
= vec_ld (0, ref
);
91 ref1
= vec_ld (15, ref
);
93 vec_st (tmp
, stride
, dest
);
95 tmp
= vec_perm (ref0
, ref1
, perm
);
98 ref0
= vec_ld (0, ref
);
99 ref1
= vec_ld (15, ref
);
100 vec_st (tmp
, 0, dest
);
101 tmp
= vec_perm (ref0
, ref1
, perm
);
102 vec_st (tmp
, stride
, dest
);
105 static void MC_put_o_8_altivec (uint8_t * dest
, const uint8_t * ref
,
106 const int stride
, int height
)
108 vector_u8_t perm0
, perm1
, tmp0
, tmp1
, ref0
, ref1
;
110 tmp0
= vec_lvsl (0, ref
);
111 tmp0
= vec_mergeh (tmp0
, tmp0
);
112 perm0
= vec_pack ((vector_u16_t
)tmp0
, (vector_u16_t
)tmp0
);
113 tmp1
= vec_lvsl (stride
, ref
);
114 tmp1
= vec_mergeh (tmp1
, tmp1
);
115 perm1
= vec_pack ((vector_u16_t
)tmp1
, (vector_u16_t
)tmp1
);
117 height
= (height
>> 1) - 1;
119 ref0
= vec_ld (0, ref
);
120 ref1
= vec_ld (7, ref
);
122 tmp0
= vec_perm (ref0
, ref1
, perm0
);
125 ref0
= vec_ld (0, ref
);
126 ref1
= vec_ld (7, ref
);
128 vec_ste ((vector_u32_t
)tmp0
, 0, (unsigned int *)dest
);
129 vec_ste ((vector_u32_t
)tmp0
, 4, (unsigned int *)dest
);
131 tmp1
= vec_perm (ref0
, ref1
, perm1
);
133 ref0
= vec_ld (0, ref
);
134 ref1
= vec_ld (7, ref
);
136 vec_ste ((vector_u32_t
)tmp1
, 0, (unsigned int *)dest
);
137 vec_ste ((vector_u32_t
)tmp1
, 4, (unsigned int *)dest
);
139 tmp0
= vec_perm (ref0
, ref1
, perm0
);
142 ref0
= vec_ld (0, ref
);
143 ref1
= vec_ld (7, ref
);
144 vec_ste ((vector_u32_t
)tmp0
, 0, (unsigned int *)dest
);
145 vec_ste ((vector_u32_t
)tmp0
, 4, (unsigned int *)dest
);
147 tmp1
= vec_perm (ref0
, ref1
, perm1
);
148 vec_ste ((vector_u32_t
)tmp1
, 0, (unsigned int *)dest
);
149 vec_ste ((vector_u32_t
)tmp1
, 4, (unsigned int *)dest
);
152 static void MC_put_x_16_altivec (uint8_t * dest
, const uint8_t * ref
,
153 const int stride
, int height
)
155 vector_u8_t permA
, permB
, ref0
, ref1
, tmp
;
157 permA
= vec_lvsl (0, ref
);
158 permB
= vec_add (permA
, vec_splat_u8 (1));
160 height
= (height
>> 1) - 1;
162 ref0
= vec_ld (0, ref
);
163 ref1
= vec_ld (16, ref
);
165 tmp
= vec_avg (vec_perm (ref0
, ref1
, permA
),
166 vec_perm (ref0
, ref1
, permB
));
169 ref0
= vec_ld (0, ref
);
170 ref1
= vec_ld (16, ref
);
172 vec_st (tmp
, 0, dest
);
173 tmp
= vec_avg (vec_perm (ref0
, ref1
, permA
),
174 vec_perm (ref0
, ref1
, permB
));
176 ref0
= vec_ld (0, ref
);
177 ref1
= vec_ld (16, ref
);
179 vec_st (tmp
, stride
, dest
);
181 tmp
= vec_avg (vec_perm (ref0
, ref1
, permA
),
182 vec_perm (ref0
, ref1
, permB
));
185 ref0
= vec_ld (0, ref
);
186 ref1
= vec_ld (16, ref
);
187 vec_st (tmp
, 0, dest
);
188 tmp
= vec_avg (vec_perm (ref0
, ref1
, permA
),
189 vec_perm (ref0
, ref1
, permB
));
190 vec_st (tmp
, stride
, dest
);
193 static void MC_put_x_8_altivec (uint8_t * dest
, const uint8_t * ref
,
194 const int stride
, int height
)
196 vector_u8_t perm0A
, perm0B
, perm1A
, perm1B
, ones
, tmp0
, tmp1
, ref0
, ref1
;
198 ones
= vec_splat_u8 (1);
199 tmp0
= vec_lvsl (0, ref
);
200 tmp0
= vec_mergeh (tmp0
, tmp0
);
201 perm0A
= vec_pack ((vector_u16_t
)tmp0
, (vector_u16_t
)tmp0
);
202 perm0B
= vec_add (perm0A
, ones
);
203 tmp1
= vec_lvsl (stride
, ref
);
204 tmp1
= vec_mergeh (tmp1
, tmp1
);
205 perm1A
= vec_pack ((vector_u16_t
)tmp1
, (vector_u16_t
)tmp1
);
206 perm1B
= vec_add (perm1A
, ones
);
208 height
= (height
>> 1) - 1;
210 ref0
= vec_ld (0, ref
);
211 ref1
= vec_ld (8, ref
);
213 tmp0
= vec_avg (vec_perm (ref0
, ref1
, perm0A
),
214 vec_perm (ref0
, ref1
, perm0B
));
217 ref0
= vec_ld (0, ref
);
218 ref1
= vec_ld (8, ref
);
220 vec_ste ((vector_u32_t
)tmp0
, 0, (unsigned int *)dest
);
221 vec_ste ((vector_u32_t
)tmp0
, 4, (unsigned int *)dest
);
223 tmp1
= vec_avg (vec_perm (ref0
, ref1
, perm1A
),
224 vec_perm (ref0
, ref1
, perm1B
));
226 ref0
= vec_ld (0, ref
);
227 ref1
= vec_ld (8, ref
);
229 vec_ste ((vector_u32_t
)tmp1
, 0, (unsigned int *)dest
);
230 vec_ste ((vector_u32_t
)tmp1
, 4, (unsigned int *)dest
);
232 tmp0
= vec_avg (vec_perm (ref0
, ref1
, perm0A
),
233 vec_perm (ref0
, ref1
, perm0B
));
236 ref0
= vec_ld (0, ref
);
237 ref1
= vec_ld (8, ref
);
238 vec_ste ((vector_u32_t
)tmp0
, 0, (unsigned int *)dest
);
239 vec_ste ((vector_u32_t
)tmp0
, 4, (unsigned int *)dest
);
241 tmp1
= vec_avg (vec_perm (ref0
, ref1
, perm1A
),
242 vec_perm (ref0
, ref1
, perm1B
));
243 vec_ste ((vector_u32_t
)tmp1
, 0, (unsigned int *)dest
);
244 vec_ste ((vector_u32_t
)tmp1
, 4, (unsigned int *)dest
);
247 static void MC_put_y_16_altivec (uint8_t * dest
, const uint8_t * ref
,
248 const int stride
, int height
)
250 vector_u8_t perm
, ref0
, ref1
, tmp0
, tmp1
, tmp
;
252 perm
= vec_lvsl (0, ref
);
254 height
= (height
>> 1) - 1;
256 ref0
= vec_ld (0, ref
);
257 ref1
= vec_ld (15, ref
);
259 tmp0
= vec_perm (ref0
, ref1
, perm
);
260 ref0
= vec_ld (0, ref
);
261 ref1
= vec_ld (15, ref
);
263 tmp1
= vec_perm (ref0
, ref1
, perm
);
264 tmp
= vec_avg (tmp0
, tmp1
);
267 ref0
= vec_ld (0, ref
);
268 ref1
= vec_ld (15, ref
);
270 vec_st (tmp
, 0, dest
);
271 tmp0
= vec_perm (ref0
, ref1
, perm
);
272 tmp
= vec_avg (tmp0
, tmp1
);
274 ref0
= vec_ld (0, ref
);
275 ref1
= vec_ld (15, ref
);
277 vec_st (tmp
, stride
, dest
);
279 tmp1
= vec_perm (ref0
, ref1
, perm
);
280 tmp
= vec_avg (tmp0
, tmp1
);
283 ref0
= vec_ld (0, ref
);
284 ref1
= vec_ld (15, ref
);
285 vec_st (tmp
, 0, dest
);
286 tmp0
= vec_perm (ref0
, ref1
, perm
);
287 tmp
= vec_avg (tmp0
, tmp1
);
288 vec_st (tmp
, stride
, dest
);
291 static void MC_put_y_8_altivec (uint8_t * dest
, const uint8_t * ref
,
292 const int stride
, int height
)
294 vector_u8_t perm0
, perm1
, tmp0
, tmp1
, tmp
, ref0
, ref1
;
296 tmp0
= vec_lvsl (0, ref
);
297 tmp0
= vec_mergeh (tmp0
, tmp0
);
298 perm0
= vec_pack ((vector_u16_t
)tmp0
, (vector_u16_t
)tmp0
);
299 tmp1
= vec_lvsl (stride
, ref
);
300 tmp1
= vec_mergeh (tmp1
, tmp1
);
301 perm1
= vec_pack ((vector_u16_t
)tmp1
, (vector_u16_t
)tmp1
);
303 height
= (height
>> 1) - 1;
305 ref0
= vec_ld (0, ref
);
306 ref1
= vec_ld (7, ref
);
308 tmp0
= vec_perm (ref0
, ref1
, perm0
);
309 ref0
= vec_ld (0, ref
);
310 ref1
= vec_ld (7, ref
);
312 tmp1
= vec_perm (ref0
, ref1
, perm1
);
313 tmp
= vec_avg (tmp0
, tmp1
);
316 ref0
= vec_ld (0, ref
);
317 ref1
= vec_ld (7, ref
);
319 vec_ste ((vector_u32_t
)tmp
, 0, (unsigned int *)dest
);
320 vec_ste ((vector_u32_t
)tmp
, 4, (unsigned int *)dest
);
322 tmp0
= vec_perm (ref0
, ref1
, perm0
);
323 tmp
= vec_avg (tmp0
, tmp1
);
325 ref0
= vec_ld (0, ref
);
326 ref1
= vec_ld (7, ref
);
328 vec_ste ((vector_u32_t
)tmp
, 0, (unsigned int *)dest
);
329 vec_ste ((vector_u32_t
)tmp
, 4, (unsigned int *)dest
);
331 tmp1
= vec_perm (ref0
, ref1
, perm1
);
332 tmp
= vec_avg (tmp0
, tmp1
);
335 ref0
= vec_ld (0, ref
);
336 ref1
= vec_ld (7, ref
);
337 vec_ste ((vector_u32_t
)tmp
, 0, (unsigned int *)dest
);
338 vec_ste ((vector_u32_t
)tmp
, 4, (unsigned int *)dest
);
340 tmp0
= vec_perm (ref0
, ref1
, perm0
);
341 tmp
= vec_avg (tmp0
, tmp1
);
342 vec_ste ((vector_u32_t
)tmp
, 0, (unsigned int *)dest
);
343 vec_ste ((vector_u32_t
)tmp
, 4, (unsigned int *)dest
);
346 static void MC_put_xy_16_altivec (uint8_t * dest
, const uint8_t * ref
,
347 const int stride
, int height
)
349 vector_u8_t permA
, permB
, ref0
, ref1
, A
, B
, avg0
, avg1
, xor0
, xor1
, tmp
;
352 ones
= vec_splat_u8 (1);
353 permA
= vec_lvsl (0, ref
);
354 permB
= vec_add (permA
, ones
);
356 height
= (height
>> 1) - 1;
358 ref0
= vec_ld (0, ref
);
359 ref1
= vec_ld (16, ref
);
361 A
= vec_perm (ref0
, ref1
, permA
);
362 B
= vec_perm (ref0
, ref1
, permB
);
363 avg0
= vec_avg (A
, B
);
364 xor0
= vec_xor (A
, B
);
366 ref0
= vec_ld (0, ref
);
367 ref1
= vec_ld (16, ref
);
369 A
= vec_perm (ref0
, ref1
, permA
);
370 B
= vec_perm (ref0
, ref1
, permB
);
371 avg1
= vec_avg (A
, B
);
372 xor1
= vec_xor (A
, B
);
373 tmp
= vec_sub (vec_avg (avg0
, avg1
),
374 vec_and (vec_and (ones
, vec_or (xor0
, xor1
)),
375 vec_xor (avg0
, avg1
)));
378 ref0
= vec_ld (0, ref
);
379 ref1
= vec_ld (16, ref
);
381 vec_st (tmp
, 0, dest
);
382 A
= vec_perm (ref0
, ref1
, permA
);
383 B
= vec_perm (ref0
, ref1
, permB
);
384 avg0
= vec_avg (A
, B
);
385 xor0
= vec_xor (A
, B
);
386 tmp
= vec_sub (vec_avg (avg0
, avg1
),
387 vec_and (vec_and (ones
, vec_or (xor0
, xor1
)),
388 vec_xor (avg0
, avg1
)));
390 ref0
= vec_ld (0, ref
);
391 ref1
= vec_ld (16, ref
);
393 vec_st (tmp
, stride
, dest
);
395 A
= vec_perm (ref0
, ref1
, permA
);
396 B
= vec_perm (ref0
, ref1
, permB
);
397 avg1
= vec_avg (A
, B
);
398 xor1
= vec_xor (A
, B
);
399 tmp
= vec_sub (vec_avg (avg0
, avg1
),
400 vec_and (vec_and (ones
, vec_or (xor0
, xor1
)),
401 vec_xor (avg0
, avg1
)));
404 ref0
= vec_ld (0, ref
);
405 ref1
= vec_ld (16, ref
);
406 vec_st (tmp
, 0, dest
);
407 A
= vec_perm (ref0
, ref1
, permA
);
408 B
= vec_perm (ref0
, ref1
, permB
);
409 avg0
= vec_avg (A
, B
);
410 xor0
= vec_xor (A
, B
);
411 tmp
= vec_sub (vec_avg (avg0
, avg1
),
412 vec_and (vec_and (ones
, vec_or (xor0
, xor1
)),
413 vec_xor (avg0
, avg1
)));
414 vec_st (tmp
, stride
, dest
);
417 static void MC_put_xy_8_altivec (uint8_t * dest
, const uint8_t * ref
,
418 const int stride
, int height
)
420 vector_u8_t perm0A
, perm0B
, perm1A
, perm1B
, ref0
, ref1
, A
, B
;
421 vector_u8_t avg0
, avg1
, xor0
, xor1
, tmp
, ones
;
423 ones
= vec_splat_u8 (1);
424 perm0A
= vec_lvsl (0, ref
);
425 perm0A
= vec_mergeh (perm0A
, perm0A
);
426 perm0A
= vec_pack ((vector_u16_t
)perm0A
, (vector_u16_t
)perm0A
);
427 perm0B
= vec_add (perm0A
, ones
);
428 perm1A
= vec_lvsl (stride
, ref
);
429 perm1A
= vec_mergeh (perm1A
, perm1A
);
430 perm1A
= vec_pack ((vector_u16_t
)perm1A
, (vector_u16_t
)perm1A
);
431 perm1B
= vec_add (perm1A
, ones
);
433 height
= (height
>> 1) - 1;
435 ref0
= vec_ld (0, ref
);
436 ref1
= vec_ld (8, ref
);
438 A
= vec_perm (ref0
, ref1
, perm0A
);
439 B
= vec_perm (ref0
, ref1
, perm0B
);
440 avg0
= vec_avg (A
, B
);
441 xor0
= vec_xor (A
, B
);
443 ref0
= vec_ld (0, ref
);
444 ref1
= vec_ld (8, ref
);
446 A
= vec_perm (ref0
, ref1
, perm1A
);
447 B
= vec_perm (ref0
, ref1
, perm1B
);
448 avg1
= vec_avg (A
, B
);
449 xor1
= vec_xor (A
, B
);
450 tmp
= vec_sub (vec_avg (avg0
, avg1
),
451 vec_and (vec_and (ones
, vec_or (xor0
, xor1
)),
452 vec_xor (avg0
, avg1
)));
455 ref0
= vec_ld (0, ref
);
456 ref1
= vec_ld (8, ref
);
458 vec_ste ((vector_u32_t
)tmp
, 0, (unsigned int *)dest
);
459 vec_ste ((vector_u32_t
)tmp
, 4, (unsigned int *)dest
);
461 A
= vec_perm (ref0
, ref1
, perm0A
);
462 B
= vec_perm (ref0
, ref1
, perm0B
);
463 avg0
= vec_avg (A
, B
);
464 xor0
= vec_xor (A
, B
);
465 tmp
= vec_sub (vec_avg (avg0
, avg1
),
466 vec_and (vec_and (ones
, vec_or (xor0
, xor1
)),
467 vec_xor (avg0
, avg1
)));
469 ref0
= vec_ld (0, ref
);
470 ref1
= vec_ld (8, ref
);
472 vec_ste ((vector_u32_t
)tmp
, 0, (unsigned int *)dest
);
473 vec_ste ((vector_u32_t
)tmp
, 4, (unsigned int *)dest
);
475 A
= vec_perm (ref0
, ref1
, perm1A
);
476 B
= vec_perm (ref0
, ref1
, perm1B
);
477 avg1
= vec_avg (A
, B
);
478 xor1
= vec_xor (A
, B
);
479 tmp
= vec_sub (vec_avg (avg0
, avg1
),
480 vec_and (vec_and (ones
, vec_or (xor0
, xor1
)),
481 vec_xor (avg0
, avg1
)));
484 ref0
= vec_ld (0, ref
);
485 ref1
= vec_ld (8, ref
);
486 vec_ste ((vector_u32_t
)tmp
, 0, (unsigned int *)dest
);
487 vec_ste ((vector_u32_t
)tmp
, 4, (unsigned int *)dest
);
489 A
= vec_perm (ref0
, ref1
, perm0A
);
490 B
= vec_perm (ref0
, ref1
, perm0B
);
491 avg0
= vec_avg (A
, B
);
492 xor0
= vec_xor (A
, B
);
493 tmp
= vec_sub (vec_avg (avg0
, avg1
),
494 vec_and (vec_and (ones
, vec_or (xor0
, xor1
)),
495 vec_xor (avg0
, avg1
)));
496 vec_ste ((vector_u32_t
)tmp
, 0, (unsigned int *)dest
);
497 vec_ste ((vector_u32_t
)tmp
, 4, (unsigned int *)dest
);
501 static void MC_put_xy_8_altivec (uint8_t * dest
, const uint8_t * ref
,
502 const int stride
, int height
)
504 vector_u8_t permA
, permB
, ref0
, ref1
, A
, B
, C
, D
, tmp
, zero
, ones
;
505 vector_u16_t splat2
, temp
;
507 ones
= vec_splat_u8 (1);
508 permA
= vec_lvsl (0, ref
);
509 permB
= vec_add (permA
, ones
);
511 zero
= vec_splat_u8 (0);
512 splat2
= vec_splat_u16 (2);
515 ref0
= vec_ld (0, ref
);
516 ref1
= vec_ld (8, ref
);
518 A
= vec_perm (ref0
, ref1
, permA
);
519 B
= vec_perm (ref0
, ref1
, permB
);
520 ref0
= vec_ld (0, ref
);
521 ref1
= vec_ld (8, ref
);
522 C
= vec_perm (ref0
, ref1
, permA
);
523 D
= vec_perm (ref0
, ref1
, permB
);
525 temp
= vec_add (vec_add ((vector_u16_t
)vec_mergeh (zero
, A
),
526 (vector_u16_t
)vec_mergeh (zero
, B
)),
527 vec_add ((vector_u16_t
)vec_mergeh (zero
, C
),
528 (vector_u16_t
)vec_mergeh (zero
, D
)));
529 temp
= vec_sr (vec_add (temp
, splat2
), splat2
);
530 tmp
= vec_pack (temp
, temp
);
532 vec_st (tmp
, 0, dest
);
534 tmp
= vec_avg (vec_perm (ref0
, ref1
, permA
),
535 vec_perm (ref0
, ref1
, permB
));
540 static void MC_avg_o_16_altivec (uint8_t * dest
, const uint8_t * ref
,
541 const int stride
, int height
)
543 vector_u8_t perm
, ref0
, ref1
, tmp
, prev
;
545 perm
= vec_lvsl (0, ref
);
547 height
= (height
>> 1) - 1;
549 ref0
= vec_ld (0, ref
);
550 ref1
= vec_ld (15, ref
);
552 prev
= vec_ld (0, dest
);
553 tmp
= vec_avg (prev
, vec_perm (ref0
, ref1
, perm
));
556 ref0
= vec_ld (0, ref
);
557 ref1
= vec_ld (15, ref
);
559 prev
= vec_ld (stride
, dest
);
560 vec_st (tmp
, 0, dest
);
561 tmp
= vec_avg (prev
, vec_perm (ref0
, ref1
, perm
));
563 ref0
= vec_ld (0, ref
);
564 ref1
= vec_ld (15, ref
);
566 prev
= vec_ld (2*stride
, dest
);
567 vec_st (tmp
, stride
, dest
);
569 tmp
= vec_avg (prev
, vec_perm (ref0
, ref1
, perm
));
572 ref0
= vec_ld (0, ref
);
573 ref1
= vec_ld (15, ref
);
574 prev
= vec_ld (stride
, dest
);
575 vec_st (tmp
, 0, dest
);
576 tmp
= vec_avg (prev
, vec_perm (ref0
, ref1
, perm
));
577 vec_st (tmp
, stride
, dest
);
580 static void MC_avg_o_8_altivec (uint8_t * dest
, const uint8_t * ref
,
581 const int stride
, int height
)
583 vector_u8_t perm0
, perm1
, tmp0
, tmp1
, ref0
, ref1
, prev
;
585 tmp0
= vec_lvsl (0, ref
);
586 tmp0
= vec_mergeh (tmp0
, tmp0
);
587 perm0
= vec_pack ((vector_u16_t
)tmp0
, (vector_u16_t
)tmp0
);
588 tmp1
= vec_lvsl (stride
, ref
);
589 tmp1
= vec_mergeh (tmp1
, tmp1
);
590 perm1
= vec_pack ((vector_u16_t
)tmp1
, (vector_u16_t
)tmp1
);
592 height
= (height
>> 1) - 1;
594 ref0
= vec_ld (0, ref
);
595 ref1
= vec_ld (7, ref
);
597 prev
= vec_ld (0, dest
);
598 tmp0
= vec_avg (prev
, vec_perm (ref0
, ref1
, perm0
));
601 ref0
= vec_ld (0, ref
);
602 ref1
= vec_ld (7, ref
);
604 prev
= vec_ld (stride
, dest
);
605 vec_ste ((vector_u32_t
)tmp0
, 0, (unsigned int *)dest
);
606 vec_ste ((vector_u32_t
)tmp0
, 4, (unsigned int *)dest
);
608 tmp1
= vec_avg (prev
, vec_perm (ref0
, ref1
, perm1
));
610 ref0
= vec_ld (0, ref
);
611 ref1
= vec_ld (7, ref
);
613 prev
= vec_ld (stride
, dest
);
614 vec_ste ((vector_u32_t
)tmp1
, 0, (unsigned int *)dest
);
615 vec_ste ((vector_u32_t
)tmp1
, 4, (unsigned int *)dest
);
617 tmp0
= vec_avg (prev
, vec_perm (ref0
, ref1
, perm0
));
620 ref0
= vec_ld (0, ref
);
621 ref1
= vec_ld (7, ref
);
622 prev
= vec_ld (stride
, dest
);
623 vec_ste ((vector_u32_t
)tmp0
, 0, (unsigned int *)dest
);
624 vec_ste ((vector_u32_t
)tmp0
, 4, (unsigned int *)dest
);
626 tmp1
= vec_avg (prev
, vec_perm (ref0
, ref1
, perm1
));
627 vec_ste ((vector_u32_t
)tmp1
, 0, (unsigned int *)dest
);
628 vec_ste ((vector_u32_t
)tmp1
, 4, (unsigned int *)dest
);
631 static void MC_avg_x_16_altivec (uint8_t * dest
, const uint8_t * ref
,
632 const int stride
, int height
)
634 vector_u8_t permA
, permB
, ref0
, ref1
, tmp
, prev
;
636 permA
= vec_lvsl (0, ref
);
637 permB
= vec_add (permA
, vec_splat_u8 (1));
639 height
= (height
>> 1) - 1;
641 ref0
= vec_ld (0, ref
);
642 ref1
= vec_ld (16, ref
);
643 prev
= vec_ld (0, dest
);
645 tmp
= vec_avg (prev
, vec_avg (vec_perm (ref0
, ref1
, permA
),
646 vec_perm (ref0
, ref1
, permB
)));
649 ref0
= vec_ld (0, ref
);
650 ref1
= vec_ld (16, ref
);
652 prev
= vec_ld (stride
, dest
);
653 vec_st (tmp
, 0, dest
);
654 tmp
= vec_avg (prev
, vec_avg (vec_perm (ref0
, ref1
, permA
),
655 vec_perm (ref0
, ref1
, permB
)));
657 ref0
= vec_ld (0, ref
);
658 ref1
= vec_ld (16, ref
);
660 prev
= vec_ld (2*stride
, dest
);
661 vec_st (tmp
, stride
, dest
);
663 tmp
= vec_avg (prev
, vec_avg (vec_perm (ref0
, ref1
, permA
),
664 vec_perm (ref0
, ref1
, permB
)));
667 ref0
= vec_ld (0, ref
);
668 ref1
= vec_ld (16, ref
);
669 prev
= vec_ld (stride
, dest
);
670 vec_st (tmp
, 0, dest
);
671 tmp
= vec_avg (prev
, vec_avg (vec_perm (ref0
, ref1
, permA
),
672 vec_perm (ref0
, ref1
, permB
)));
673 vec_st (tmp
, stride
, dest
);
676 static void MC_avg_x_8_altivec (uint8_t * dest
, const uint8_t * ref
,
677 const int stride
, int height
)
679 vector_u8_t perm0A
, perm0B
, perm1A
, perm1B
, ones
, tmp0
, tmp1
, ref0
, ref1
;
682 ones
= vec_splat_u8 (1);
683 tmp0
= vec_lvsl (0, ref
);
684 tmp0
= vec_mergeh (tmp0
, tmp0
);
685 perm0A
= vec_pack ((vector_u16_t
)tmp0
, (vector_u16_t
)tmp0
);
686 perm0B
= vec_add (perm0A
, ones
);
687 tmp1
= vec_lvsl (stride
, ref
);
688 tmp1
= vec_mergeh (tmp1
, tmp1
);
689 perm1A
= vec_pack ((vector_u16_t
)tmp1
, (vector_u16_t
)tmp1
);
690 perm1B
= vec_add (perm1A
, ones
);
692 height
= (height
>> 1) - 1;
694 ref0
= vec_ld (0, ref
);
695 ref1
= vec_ld (8, ref
);
696 prev
= vec_ld (0, dest
);
698 tmp0
= vec_avg (prev
, vec_avg (vec_perm (ref0
, ref1
, perm0A
),
699 vec_perm (ref0
, ref1
, perm0B
)));
702 ref0
= vec_ld (0, ref
);
703 ref1
= vec_ld (8, ref
);
705 prev
= vec_ld (stride
, dest
);
706 vec_ste ((vector_u32_t
)tmp0
, 0, (unsigned int *)dest
);
707 vec_ste ((vector_u32_t
)tmp0
, 4, (unsigned int *)dest
);
709 tmp1
= vec_avg (prev
, vec_avg (vec_perm (ref0
, ref1
, perm1A
),
710 vec_perm (ref0
, ref1
, perm1B
)));
712 ref0
= vec_ld (0, ref
);
713 ref1
= vec_ld (8, ref
);
715 prev
= vec_ld (stride
, dest
);
716 vec_ste ((vector_u32_t
)tmp1
, 0, (unsigned int *)dest
);
717 vec_ste ((vector_u32_t
)tmp1
, 4, (unsigned int *)dest
);
719 tmp0
= vec_avg (prev
, vec_avg (vec_perm (ref0
, ref1
, perm0A
),
720 vec_perm (ref0
, ref1
, perm0B
)));
723 ref0
= vec_ld (0, ref
);
724 ref1
= vec_ld (8, ref
);
725 prev
= vec_ld (stride
, dest
);
726 vec_ste ((vector_u32_t
)tmp0
, 0, (unsigned int *)dest
);
727 vec_ste ((vector_u32_t
)tmp0
, 4, (unsigned int *)dest
);
729 tmp1
= vec_avg (prev
, vec_avg (vec_perm (ref0
, ref1
, perm1A
),
730 vec_perm (ref0
, ref1
, perm1B
)));
731 vec_ste ((vector_u32_t
)tmp1
, 0, (unsigned int *)dest
);
732 vec_ste ((vector_u32_t
)tmp1
, 4, (unsigned int *)dest
);
735 static void MC_avg_y_16_altivec (uint8_t * dest
, const uint8_t * ref
,
736 const int stride
, int height
)
738 vector_u8_t perm
, ref0
, ref1
, tmp0
, tmp1
, tmp
, prev
;
740 perm
= vec_lvsl (0, ref
);
742 height
= (height
>> 1) - 1;
744 ref0
= vec_ld (0, ref
);
745 ref1
= vec_ld (15, ref
);
747 tmp0
= vec_perm (ref0
, ref1
, perm
);
748 ref0
= vec_ld (0, ref
);
749 ref1
= vec_ld (15, ref
);
751 prev
= vec_ld (0, dest
);
752 tmp1
= vec_perm (ref0
, ref1
, perm
);
753 tmp
= vec_avg (prev
, vec_avg (tmp0
, tmp1
));
756 ref0
= vec_ld (0, ref
);
757 ref1
= vec_ld (15, ref
);
759 prev
= vec_ld (stride
, dest
);
760 vec_st (tmp
, 0, dest
);
761 tmp0
= vec_perm (ref0
, ref1
, perm
);
762 tmp
= vec_avg (prev
, vec_avg (tmp0
, tmp1
));
764 ref0
= vec_ld (0, ref
);
765 ref1
= vec_ld (15, ref
);
767 prev
= vec_ld (2*stride
, dest
);
768 vec_st (tmp
, stride
, dest
);
770 tmp1
= vec_perm (ref0
, ref1
, perm
);
771 tmp
= vec_avg (prev
, vec_avg (tmp0
, tmp1
));
774 ref0
= vec_ld (0, ref
);
775 ref1
= vec_ld (15, ref
);
776 prev
= vec_ld (stride
, dest
);
777 vec_st (tmp
, 0, dest
);
778 tmp0
= vec_perm (ref0
, ref1
, perm
);
779 tmp
= vec_avg (prev
, vec_avg (tmp0
, tmp1
));
780 vec_st (tmp
, stride
, dest
);
783 static void MC_avg_y_8_altivec (uint8_t * dest
, const uint8_t * ref
,
784 const int stride
, int height
)
786 vector_u8_t perm0
, perm1
, tmp0
, tmp1
, tmp
, ref0
, ref1
, prev
;
788 tmp0
= vec_lvsl (0, ref
);
789 tmp0
= vec_mergeh (tmp0
, tmp0
);
790 perm0
= vec_pack ((vector_u16_t
)tmp0
, (vector_u16_t
)tmp0
);
791 tmp1
= vec_lvsl (stride
, ref
);
792 tmp1
= vec_mergeh (tmp1
, tmp1
);
793 perm1
= vec_pack ((vector_u16_t
)tmp1
, (vector_u16_t
)tmp1
);
795 height
= (height
>> 1) - 1;
797 ref0
= vec_ld (0, ref
);
798 ref1
= vec_ld (7, ref
);
800 tmp0
= vec_perm (ref0
, ref1
, perm0
);
801 ref0
= vec_ld (0, ref
);
802 ref1
= vec_ld (7, ref
);
804 prev
= vec_ld (0, dest
);
805 tmp1
= vec_perm (ref0
, ref1
, perm1
);
806 tmp
= vec_avg (prev
, vec_avg (tmp0
, tmp1
));
809 ref0
= vec_ld (0, ref
);
810 ref1
= vec_ld (7, ref
);
812 prev
= vec_ld (stride
, dest
);
813 vec_ste ((vector_u32_t
)tmp
, 0, (unsigned int *)dest
);
814 vec_ste ((vector_u32_t
)tmp
, 4, (unsigned int *)dest
);
816 tmp0
= vec_perm (ref0
, ref1
, perm0
);
817 tmp
= vec_avg (prev
, vec_avg (tmp0
, tmp1
));
819 ref0
= vec_ld (0, ref
);
820 ref1
= vec_ld (7, ref
);
822 prev
= vec_ld (stride
, dest
);
823 vec_ste ((vector_u32_t
)tmp
, 0, (unsigned int *)dest
);
824 vec_ste ((vector_u32_t
)tmp
, 4, (unsigned int *)dest
);
826 tmp1
= vec_perm (ref0
, ref1
, perm1
);
827 tmp
= vec_avg (prev
, vec_avg (tmp0
, tmp1
));
830 ref0
= vec_ld (0, ref
);
831 ref1
= vec_ld (7, ref
);
832 prev
= vec_ld (stride
, dest
);
833 vec_ste ((vector_u32_t
)tmp
, 0, (unsigned int *)dest
);
834 vec_ste ((vector_u32_t
)tmp
, 4, (unsigned int *)dest
);
836 tmp0
= vec_perm (ref0
, ref1
, perm0
);
837 tmp
= vec_avg (prev
, vec_avg (tmp0
, tmp1
));
838 vec_ste ((vector_u32_t
)tmp
, 0, (unsigned int *)dest
);
839 vec_ste ((vector_u32_t
)tmp
, 4, (unsigned int *)dest
);
842 static void MC_avg_xy_16_altivec (uint8_t * dest
, const uint8_t * ref
,
843 const int stride
, int height
)
845 vector_u8_t permA
, permB
, ref0
, ref1
, A
, B
, avg0
, avg1
, xor0
, xor1
, tmp
;
846 vector_u8_t ones
, prev
;
848 ones
= vec_splat_u8 (1);
849 permA
= vec_lvsl (0, ref
);
850 permB
= vec_add (permA
, ones
);
852 height
= (height
>> 1) - 1;
854 ref0
= vec_ld (0, ref
);
855 ref1
= vec_ld (16, ref
);
857 A
= vec_perm (ref0
, ref1
, permA
);
858 B
= vec_perm (ref0
, ref1
, permB
);
859 avg0
= vec_avg (A
, B
);
860 xor0
= vec_xor (A
, B
);
862 ref0
= vec_ld (0, ref
);
863 ref1
= vec_ld (16, ref
);
865 prev
= vec_ld (0, dest
);
866 A
= vec_perm (ref0
, ref1
, permA
);
867 B
= vec_perm (ref0
, ref1
, permB
);
868 avg1
= vec_avg (A
, B
);
869 xor1
= vec_xor (A
, B
);
870 tmp
= vec_avg (prev
, vec_sub (vec_avg (avg0
, avg1
),
871 vec_and (vec_and (ones
, vec_or (xor0
, xor1
)),
872 vec_xor (avg0
, avg1
))));
875 ref0
= vec_ld (0, ref
);
876 ref1
= vec_ld (16, ref
);
878 prev
= vec_ld (stride
, dest
);
879 vec_st (tmp
, 0, dest
);
880 A
= vec_perm (ref0
, ref1
, permA
);
881 B
= vec_perm (ref0
, ref1
, permB
);
882 avg0
= vec_avg (A
, B
);
883 xor0
= vec_xor (A
, B
);
885 vec_sub (vec_avg (avg0
, avg1
),
886 vec_and (vec_and (ones
, vec_or (xor0
, xor1
)),
887 vec_xor (avg0
, avg1
))));
889 ref0
= vec_ld (0, ref
);
890 ref1
= vec_ld (16, ref
);
892 prev
= vec_ld (2*stride
, dest
);
893 vec_st (tmp
, stride
, dest
);
895 A
= vec_perm (ref0
, ref1
, permA
);
896 B
= vec_perm (ref0
, ref1
, permB
);
897 avg1
= vec_avg (A
, B
);
898 xor1
= vec_xor (A
, B
);
900 vec_sub (vec_avg (avg0
, avg1
),
901 vec_and (vec_and (ones
, vec_or (xor0
, xor1
)),
902 vec_xor (avg0
, avg1
))));
905 ref0
= vec_ld (0, ref
);
906 ref1
= vec_ld (16, ref
);
907 prev
= vec_ld (stride
, dest
);
908 vec_st (tmp
, 0, dest
);
909 A
= vec_perm (ref0
, ref1
, permA
);
910 B
= vec_perm (ref0
, ref1
, permB
);
911 avg0
= vec_avg (A
, B
);
912 xor0
= vec_xor (A
, B
);
913 tmp
= vec_avg (prev
, vec_sub (vec_avg (avg0
, avg1
),
914 vec_and (vec_and (ones
, vec_or (xor0
, xor1
)),
915 vec_xor (avg0
, avg1
))));
916 vec_st (tmp
, stride
, dest
);
919 static void MC_avg_xy_8_altivec (uint8_t * dest
, const uint8_t * ref
,
920 const int stride
, int height
)
922 vector_u8_t perm0A
, perm0B
, perm1A
, perm1B
, ref0
, ref1
, A
, B
;
923 vector_u8_t avg0
, avg1
, xor0
, xor1
, tmp
, ones
, prev
;
925 ones
= vec_splat_u8 (1);
926 perm0A
= vec_lvsl (0, ref
);
927 perm0A
= vec_mergeh (perm0A
, perm0A
);
928 perm0A
= vec_pack ((vector_u16_t
)perm0A
, (vector_u16_t
)perm0A
);
929 perm0B
= vec_add (perm0A
, ones
);
930 perm1A
= vec_lvsl (stride
, ref
);
931 perm1A
= vec_mergeh (perm1A
, perm1A
);
932 perm1A
= vec_pack ((vector_u16_t
)perm1A
, (vector_u16_t
)perm1A
);
933 perm1B
= vec_add (perm1A
, ones
);
935 height
= (height
>> 1) - 1;
937 ref0
= vec_ld (0, ref
);
938 ref1
= vec_ld (8, ref
);
940 A
= vec_perm (ref0
, ref1
, perm0A
);
941 B
= vec_perm (ref0
, ref1
, perm0B
);
942 avg0
= vec_avg (A
, B
);
943 xor0
= vec_xor (A
, B
);
945 ref0
= vec_ld (0, ref
);
946 ref1
= vec_ld (8, ref
);
948 prev
= vec_ld (0, dest
);
949 A
= vec_perm (ref0
, ref1
, perm1A
);
950 B
= vec_perm (ref0
, ref1
, perm1B
);
951 avg1
= vec_avg (A
, B
);
952 xor1
= vec_xor (A
, B
);
953 tmp
= vec_avg (prev
, vec_sub (vec_avg (avg0
, avg1
),
954 vec_and (vec_and (ones
, vec_or (xor0
, xor1
)),
955 vec_xor (avg0
, avg1
))));
958 ref0
= vec_ld (0, ref
);
959 ref1
= vec_ld (8, ref
);
961 prev
= vec_ld (stride
, dest
);
962 vec_ste ((vector_u32_t
)tmp
, 0, (unsigned int *)dest
);
963 vec_ste ((vector_u32_t
)tmp
, 4, (unsigned int *)dest
);
965 A
= vec_perm (ref0
, ref1
, perm0A
);
966 B
= vec_perm (ref0
, ref1
, perm0B
);
967 avg0
= vec_avg (A
, B
);
968 xor0
= vec_xor (A
, B
);
970 vec_sub (vec_avg (avg0
, avg1
),
971 vec_and (vec_and (ones
, vec_or (xor0
, xor1
)),
972 vec_xor (avg0
, avg1
))));
974 ref0
= vec_ld (0, ref
);
975 ref1
= vec_ld (8, ref
);
977 prev
= vec_ld (stride
, dest
);
978 vec_ste ((vector_u32_t
)tmp
, 0, (unsigned int *)dest
);
979 vec_ste ((vector_u32_t
)tmp
, 4, (unsigned int *)dest
);
981 A
= vec_perm (ref0
, ref1
, perm1A
);
982 B
= vec_perm (ref0
, ref1
, perm1B
);
983 avg1
= vec_avg (A
, B
);
984 xor1
= vec_xor (A
, B
);
986 vec_sub (vec_avg (avg0
, avg1
),
987 vec_and (vec_and (ones
, vec_or (xor0
, xor1
)),
988 vec_xor (avg0
, avg1
))));
991 ref0
= vec_ld (0, ref
);
992 ref1
= vec_ld (8, ref
);
993 prev
= vec_ld (stride
, dest
);
994 vec_ste ((vector_u32_t
)tmp
, 0, (unsigned int *)dest
);
995 vec_ste ((vector_u32_t
)tmp
, 4, (unsigned int *)dest
);
997 A
= vec_perm (ref0
, ref1
, perm0A
);
998 B
= vec_perm (ref0
, ref1
, perm0B
);
999 avg0
= vec_avg (A
, B
);
1000 xor0
= vec_xor (A
, B
);
1001 tmp
= vec_avg (prev
, vec_sub (vec_avg (avg0
, avg1
),
1002 vec_and (vec_and (ones
, vec_or (xor0
, xor1
)),
1003 vec_xor (avg0
, avg1
))));
1004 vec_ste ((vector_u32_t
)tmp
, 0, (unsigned int *)dest
);
1005 vec_ste ((vector_u32_t
)tmp
, 4, (unsigned int *)dest
);
1008 MPEG2_MC_EXTERN (altivec
)