VP6F has to be flipped for binary decoder.
[mplayer/glamo.git] / libmpeg2 / motion_comp_altivec.c
blobf98aaedc90d2a0332835aacd2357334c6f550858
1 /*
2 * motion_comp_altivec.c
3 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
7 * See http://libmpeg2.sourceforge.net/ for updates.
9 * mpeg2dec is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * mpeg2dec is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include "config.h"
26 #if ARCH_PPC
28 #ifdef HAVE_ALTIVEC_H
29 #include <altivec.h>
30 #endif
31 #include <inttypes.h>
33 #include "mpeg2.h"
34 #include "attributes.h"
35 #include "mpeg2_internal.h"
37 typedef vector signed char vector_s8_t;
38 typedef vector unsigned char vector_u8_t;
39 typedef vector signed short vector_s16_t;
40 typedef vector unsigned short vector_u16_t;
41 typedef vector signed int vector_s32_t;
42 typedef vector unsigned int vector_u32_t;
44 #ifndef COFFEE_BREAK /* Workarounds for gcc suckage */
46 static inline vector_u8_t my_vec_ld (int const A, const uint8_t * const B)
48 return vec_ld (A, (uint8_t *)B);
50 #undef vec_ld
51 #define vec_ld my_vec_ld
53 static inline vector_u8_t my_vec_and (vector_u8_t const A, vector_u8_t const B)
55 return vec_and (A, B);
57 #undef vec_and
58 #define vec_and my_vec_and
60 static inline vector_u8_t my_vec_avg (vector_u8_t const A, vector_u8_t const B)
62 return vec_avg (A, B);
64 #undef vec_avg
65 #define vec_avg my_vec_avg
67 #endif
69 static void MC_put_o_16_altivec (uint8_t * dest, const uint8_t * ref,
70 const int stride, int height)
72 vector_u8_t perm, ref0, ref1, tmp;
74 perm = vec_lvsl (0, ref);
76 height = (height >> 1) - 1;
78 ref0 = vec_ld (0, ref);
79 ref1 = vec_ld (15, ref);
80 ref += stride;
81 tmp = vec_perm (ref0, ref1, perm);
83 do {
84 ref0 = vec_ld (0, ref);
85 ref1 = vec_ld (15, ref);
86 ref += stride;
87 vec_st (tmp, 0, dest);
88 tmp = vec_perm (ref0, ref1, perm);
90 ref0 = vec_ld (0, ref);
91 ref1 = vec_ld (15, ref);
92 ref += stride;
93 vec_st (tmp, stride, dest);
94 dest += 2*stride;
95 tmp = vec_perm (ref0, ref1, perm);
96 } while (--height);
98 ref0 = vec_ld (0, ref);
99 ref1 = vec_ld (15, ref);
100 vec_st (tmp, 0, dest);
101 tmp = vec_perm (ref0, ref1, perm);
102 vec_st (tmp, stride, dest);
105 static void MC_put_o_8_altivec (uint8_t * dest, const uint8_t * ref,
106 const int stride, int height)
108 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1;
110 tmp0 = vec_lvsl (0, ref);
111 tmp0 = vec_mergeh (tmp0, tmp0);
112 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
113 tmp1 = vec_lvsl (stride, ref);
114 tmp1 = vec_mergeh (tmp1, tmp1);
115 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
117 height = (height >> 1) - 1;
119 ref0 = vec_ld (0, ref);
120 ref1 = vec_ld (7, ref);
121 ref += stride;
122 tmp0 = vec_perm (ref0, ref1, perm0);
124 do {
125 ref0 = vec_ld (0, ref);
126 ref1 = vec_ld (7, ref);
127 ref += stride;
128 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
129 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
130 dest += stride;
131 tmp1 = vec_perm (ref0, ref1, perm1);
133 ref0 = vec_ld (0, ref);
134 ref1 = vec_ld (7, ref);
135 ref += stride;
136 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
137 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
138 dest += stride;
139 tmp0 = vec_perm (ref0, ref1, perm0);
140 } while (--height);
142 ref0 = vec_ld (0, ref);
143 ref1 = vec_ld (7, ref);
144 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
145 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
146 dest += stride;
147 tmp1 = vec_perm (ref0, ref1, perm1);
148 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
149 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
152 static void MC_put_x_16_altivec (uint8_t * dest, const uint8_t * ref,
153 const int stride, int height)
155 vector_u8_t permA, permB, ref0, ref1, tmp;
157 permA = vec_lvsl (0, ref);
158 permB = vec_add (permA, vec_splat_u8 (1));
160 height = (height >> 1) - 1;
162 ref0 = vec_ld (0, ref);
163 ref1 = vec_ld (16, ref);
164 ref += stride;
165 tmp = vec_avg (vec_perm (ref0, ref1, permA),
166 vec_perm (ref0, ref1, permB));
168 do {
169 ref0 = vec_ld (0, ref);
170 ref1 = vec_ld (16, ref);
171 ref += stride;
172 vec_st (tmp, 0, dest);
173 tmp = vec_avg (vec_perm (ref0, ref1, permA),
174 vec_perm (ref0, ref1, permB));
176 ref0 = vec_ld (0, ref);
177 ref1 = vec_ld (16, ref);
178 ref += stride;
179 vec_st (tmp, stride, dest);
180 dest += 2*stride;
181 tmp = vec_avg (vec_perm (ref0, ref1, permA),
182 vec_perm (ref0, ref1, permB));
183 } while (--height);
185 ref0 = vec_ld (0, ref);
186 ref1 = vec_ld (16, ref);
187 vec_st (tmp, 0, dest);
188 tmp = vec_avg (vec_perm (ref0, ref1, permA),
189 vec_perm (ref0, ref1, permB));
190 vec_st (tmp, stride, dest);
193 static void MC_put_x_8_altivec (uint8_t * dest, const uint8_t * ref,
194 const int stride, int height)
196 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
198 ones = vec_splat_u8 (1);
199 tmp0 = vec_lvsl (0, ref);
200 tmp0 = vec_mergeh (tmp0, tmp0);
201 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
202 perm0B = vec_add (perm0A, ones);
203 tmp1 = vec_lvsl (stride, ref);
204 tmp1 = vec_mergeh (tmp1, tmp1);
205 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
206 perm1B = vec_add (perm1A, ones);
208 height = (height >> 1) - 1;
210 ref0 = vec_ld (0, ref);
211 ref1 = vec_ld (8, ref);
212 ref += stride;
213 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
214 vec_perm (ref0, ref1, perm0B));
216 do {
217 ref0 = vec_ld (0, ref);
218 ref1 = vec_ld (8, ref);
219 ref += stride;
220 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
221 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
222 dest += stride;
223 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
224 vec_perm (ref0, ref1, perm1B));
226 ref0 = vec_ld (0, ref);
227 ref1 = vec_ld (8, ref);
228 ref += stride;
229 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
230 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
231 dest += stride;
232 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
233 vec_perm (ref0, ref1, perm0B));
234 } while (--height);
236 ref0 = vec_ld (0, ref);
237 ref1 = vec_ld (8, ref);
238 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
239 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
240 dest += stride;
241 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
242 vec_perm (ref0, ref1, perm1B));
243 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
244 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
247 static void MC_put_y_16_altivec (uint8_t * dest, const uint8_t * ref,
248 const int stride, int height)
250 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp;
252 perm = vec_lvsl (0, ref);
254 height = (height >> 1) - 1;
256 ref0 = vec_ld (0, ref);
257 ref1 = vec_ld (15, ref);
258 ref += stride;
259 tmp0 = vec_perm (ref0, ref1, perm);
260 ref0 = vec_ld (0, ref);
261 ref1 = vec_ld (15, ref);
262 ref += stride;
263 tmp1 = vec_perm (ref0, ref1, perm);
264 tmp = vec_avg (tmp0, tmp1);
266 do {
267 ref0 = vec_ld (0, ref);
268 ref1 = vec_ld (15, ref);
269 ref += stride;
270 vec_st (tmp, 0, dest);
271 tmp0 = vec_perm (ref0, ref1, perm);
272 tmp = vec_avg (tmp0, tmp1);
274 ref0 = vec_ld (0, ref);
275 ref1 = vec_ld (15, ref);
276 ref += stride;
277 vec_st (tmp, stride, dest);
278 dest += 2*stride;
279 tmp1 = vec_perm (ref0, ref1, perm);
280 tmp = vec_avg (tmp0, tmp1);
281 } while (--height);
283 ref0 = vec_ld (0, ref);
284 ref1 = vec_ld (15, ref);
285 vec_st (tmp, 0, dest);
286 tmp0 = vec_perm (ref0, ref1, perm);
287 tmp = vec_avg (tmp0, tmp1);
288 vec_st (tmp, stride, dest);
291 static void MC_put_y_8_altivec (uint8_t * dest, const uint8_t * ref,
292 const int stride, int height)
294 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1;
296 tmp0 = vec_lvsl (0, ref);
297 tmp0 = vec_mergeh (tmp0, tmp0);
298 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
299 tmp1 = vec_lvsl (stride, ref);
300 tmp1 = vec_mergeh (tmp1, tmp1);
301 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
303 height = (height >> 1) - 1;
305 ref0 = vec_ld (0, ref);
306 ref1 = vec_ld (7, ref);
307 ref += stride;
308 tmp0 = vec_perm (ref0, ref1, perm0);
309 ref0 = vec_ld (0, ref);
310 ref1 = vec_ld (7, ref);
311 ref += stride;
312 tmp1 = vec_perm (ref0, ref1, perm1);
313 tmp = vec_avg (tmp0, tmp1);
315 do {
316 ref0 = vec_ld (0, ref);
317 ref1 = vec_ld (7, ref);
318 ref += stride;
319 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
320 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
321 dest += stride;
322 tmp0 = vec_perm (ref0, ref1, perm0);
323 tmp = vec_avg (tmp0, tmp1);
325 ref0 = vec_ld (0, ref);
326 ref1 = vec_ld (7, ref);
327 ref += stride;
328 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
329 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
330 dest += stride;
331 tmp1 = vec_perm (ref0, ref1, perm1);
332 tmp = vec_avg (tmp0, tmp1);
333 } while (--height);
335 ref0 = vec_ld (0, ref);
336 ref1 = vec_ld (7, ref);
337 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
338 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
339 dest += stride;
340 tmp0 = vec_perm (ref0, ref1, perm0);
341 tmp = vec_avg (tmp0, tmp1);
342 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
343 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
346 static void MC_put_xy_16_altivec (uint8_t * dest, const uint8_t * ref,
347 const int stride, int height)
349 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
350 vector_u8_t ones;
352 ones = vec_splat_u8 (1);
353 permA = vec_lvsl (0, ref);
354 permB = vec_add (permA, ones);
356 height = (height >> 1) - 1;
358 ref0 = vec_ld (0, ref);
359 ref1 = vec_ld (16, ref);
360 ref += stride;
361 A = vec_perm (ref0, ref1, permA);
362 B = vec_perm (ref0, ref1, permB);
363 avg0 = vec_avg (A, B);
364 xor0 = vec_xor (A, B);
366 ref0 = vec_ld (0, ref);
367 ref1 = vec_ld (16, ref);
368 ref += stride;
369 A = vec_perm (ref0, ref1, permA);
370 B = vec_perm (ref0, ref1, permB);
371 avg1 = vec_avg (A, B);
372 xor1 = vec_xor (A, B);
373 tmp = vec_sub (vec_avg (avg0, avg1),
374 vec_and (vec_and (ones, vec_or (xor0, xor1)),
375 vec_xor (avg0, avg1)));
377 do {
378 ref0 = vec_ld (0, ref);
379 ref1 = vec_ld (16, ref);
380 ref += stride;
381 vec_st (tmp, 0, dest);
382 A = vec_perm (ref0, ref1, permA);
383 B = vec_perm (ref0, ref1, permB);
384 avg0 = vec_avg (A, B);
385 xor0 = vec_xor (A, B);
386 tmp = vec_sub (vec_avg (avg0, avg1),
387 vec_and (vec_and (ones, vec_or (xor0, xor1)),
388 vec_xor (avg0, avg1)));
390 ref0 = vec_ld (0, ref);
391 ref1 = vec_ld (16, ref);
392 ref += stride;
393 vec_st (tmp, stride, dest);
394 dest += 2*stride;
395 A = vec_perm (ref0, ref1, permA);
396 B = vec_perm (ref0, ref1, permB);
397 avg1 = vec_avg (A, B);
398 xor1 = vec_xor (A, B);
399 tmp = vec_sub (vec_avg (avg0, avg1),
400 vec_and (vec_and (ones, vec_or (xor0, xor1)),
401 vec_xor (avg0, avg1)));
402 } while (--height);
404 ref0 = vec_ld (0, ref);
405 ref1 = vec_ld (16, ref);
406 vec_st (tmp, 0, dest);
407 A = vec_perm (ref0, ref1, permA);
408 B = vec_perm (ref0, ref1, permB);
409 avg0 = vec_avg (A, B);
410 xor0 = vec_xor (A, B);
411 tmp = vec_sub (vec_avg (avg0, avg1),
412 vec_and (vec_and (ones, vec_or (xor0, xor1)),
413 vec_xor (avg0, avg1)));
414 vec_st (tmp, stride, dest);
417 static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
418 const int stride, int height)
420 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
421 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones;
423 ones = vec_splat_u8 (1);
424 perm0A = vec_lvsl (0, ref);
425 perm0A = vec_mergeh (perm0A, perm0A);
426 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
427 perm0B = vec_add (perm0A, ones);
428 perm1A = vec_lvsl (stride, ref);
429 perm1A = vec_mergeh (perm1A, perm1A);
430 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
431 perm1B = vec_add (perm1A, ones);
433 height = (height >> 1) - 1;
435 ref0 = vec_ld (0, ref);
436 ref1 = vec_ld (8, ref);
437 ref += stride;
438 A = vec_perm (ref0, ref1, perm0A);
439 B = vec_perm (ref0, ref1, perm0B);
440 avg0 = vec_avg (A, B);
441 xor0 = vec_xor (A, B);
443 ref0 = vec_ld (0, ref);
444 ref1 = vec_ld (8, ref);
445 ref += stride;
446 A = vec_perm (ref0, ref1, perm1A);
447 B = vec_perm (ref0, ref1, perm1B);
448 avg1 = vec_avg (A, B);
449 xor1 = vec_xor (A, B);
450 tmp = vec_sub (vec_avg (avg0, avg1),
451 vec_and (vec_and (ones, vec_or (xor0, xor1)),
452 vec_xor (avg0, avg1)));
454 do {
455 ref0 = vec_ld (0, ref);
456 ref1 = vec_ld (8, ref);
457 ref += stride;
458 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
459 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
460 dest += stride;
461 A = vec_perm (ref0, ref1, perm0A);
462 B = vec_perm (ref0, ref1, perm0B);
463 avg0 = vec_avg (A, B);
464 xor0 = vec_xor (A, B);
465 tmp = vec_sub (vec_avg (avg0, avg1),
466 vec_and (vec_and (ones, vec_or (xor0, xor1)),
467 vec_xor (avg0, avg1)));
469 ref0 = vec_ld (0, ref);
470 ref1 = vec_ld (8, ref);
471 ref += stride;
472 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
473 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
474 dest += stride;
475 A = vec_perm (ref0, ref1, perm1A);
476 B = vec_perm (ref0, ref1, perm1B);
477 avg1 = vec_avg (A, B);
478 xor1 = vec_xor (A, B);
479 tmp = vec_sub (vec_avg (avg0, avg1),
480 vec_and (vec_and (ones, vec_or (xor0, xor1)),
481 vec_xor (avg0, avg1)));
482 } while (--height);
484 ref0 = vec_ld (0, ref);
485 ref1 = vec_ld (8, ref);
486 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
487 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
488 dest += stride;
489 A = vec_perm (ref0, ref1, perm0A);
490 B = vec_perm (ref0, ref1, perm0B);
491 avg0 = vec_avg (A, B);
492 xor0 = vec_xor (A, B);
493 tmp = vec_sub (vec_avg (avg0, avg1),
494 vec_and (vec_and (ones, vec_or (xor0, xor1)),
495 vec_xor (avg0, avg1)));
496 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
497 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
500 #if 0
501 static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
502 const int stride, int height)
504 vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones;
505 vector_u16_t splat2, temp;
507 ones = vec_splat_u8 (1);
508 permA = vec_lvsl (0, ref);
509 permB = vec_add (permA, ones);
511 zero = vec_splat_u8 (0);
512 splat2 = vec_splat_u16 (2);
514 do {
515 ref0 = vec_ld (0, ref);
516 ref1 = vec_ld (8, ref);
517 ref += stride;
518 A = vec_perm (ref0, ref1, permA);
519 B = vec_perm (ref0, ref1, permB);
520 ref0 = vec_ld (0, ref);
521 ref1 = vec_ld (8, ref);
522 C = vec_perm (ref0, ref1, permA);
523 D = vec_perm (ref0, ref1, permB);
525 temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A),
526 (vector_u16_t)vec_mergeh (zero, B)),
527 vec_add ((vector_u16_t)vec_mergeh (zero, C),
528 (vector_u16_t)vec_mergeh (zero, D)));
529 temp = vec_sr (vec_add (temp, splat2), splat2);
530 tmp = vec_pack (temp, temp);
532 vec_st (tmp, 0, dest);
533 dest += stride;
534 tmp = vec_avg (vec_perm (ref0, ref1, permA),
535 vec_perm (ref0, ref1, permB));
536 } while (--height);
538 #endif
540 static void MC_avg_o_16_altivec (uint8_t * dest, const uint8_t * ref,
541 const int stride, int height)
543 vector_u8_t perm, ref0, ref1, tmp, prev;
545 perm = vec_lvsl (0, ref);
547 height = (height >> 1) - 1;
549 ref0 = vec_ld (0, ref);
550 ref1 = vec_ld (15, ref);
551 ref += stride;
552 prev = vec_ld (0, dest);
553 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
555 do {
556 ref0 = vec_ld (0, ref);
557 ref1 = vec_ld (15, ref);
558 ref += stride;
559 prev = vec_ld (stride, dest);
560 vec_st (tmp, 0, dest);
561 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
563 ref0 = vec_ld (0, ref);
564 ref1 = vec_ld (15, ref);
565 ref += stride;
566 prev = vec_ld (2*stride, dest);
567 vec_st (tmp, stride, dest);
568 dest += 2*stride;
569 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
570 } while (--height);
572 ref0 = vec_ld (0, ref);
573 ref1 = vec_ld (15, ref);
574 prev = vec_ld (stride, dest);
575 vec_st (tmp, 0, dest);
576 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
577 vec_st (tmp, stride, dest);
580 static void MC_avg_o_8_altivec (uint8_t * dest, const uint8_t * ref,
581 const int stride, int height)
583 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev;
585 tmp0 = vec_lvsl (0, ref);
586 tmp0 = vec_mergeh (tmp0, tmp0);
587 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
588 tmp1 = vec_lvsl (stride, ref);
589 tmp1 = vec_mergeh (tmp1, tmp1);
590 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
592 height = (height >> 1) - 1;
594 ref0 = vec_ld (0, ref);
595 ref1 = vec_ld (7, ref);
596 ref += stride;
597 prev = vec_ld (0, dest);
598 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
600 do {
601 ref0 = vec_ld (0, ref);
602 ref1 = vec_ld (7, ref);
603 ref += stride;
604 prev = vec_ld (stride, dest);
605 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
606 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
607 dest += stride;
608 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
610 ref0 = vec_ld (0, ref);
611 ref1 = vec_ld (7, ref);
612 ref += stride;
613 prev = vec_ld (stride, dest);
614 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
615 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
616 dest += stride;
617 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
618 } while (--height);
620 ref0 = vec_ld (0, ref);
621 ref1 = vec_ld (7, ref);
622 prev = vec_ld (stride, dest);
623 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
624 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
625 dest += stride;
626 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
627 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
628 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
631 static void MC_avg_x_16_altivec (uint8_t * dest, const uint8_t * ref,
632 const int stride, int height)
634 vector_u8_t permA, permB, ref0, ref1, tmp, prev;
636 permA = vec_lvsl (0, ref);
637 permB = vec_add (permA, vec_splat_u8 (1));
639 height = (height >> 1) - 1;
641 ref0 = vec_ld (0, ref);
642 ref1 = vec_ld (16, ref);
643 prev = vec_ld (0, dest);
644 ref += stride;
645 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
646 vec_perm (ref0, ref1, permB)));
648 do {
649 ref0 = vec_ld (0, ref);
650 ref1 = vec_ld (16, ref);
651 ref += stride;
652 prev = vec_ld (stride, dest);
653 vec_st (tmp, 0, dest);
654 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
655 vec_perm (ref0, ref1, permB)));
657 ref0 = vec_ld (0, ref);
658 ref1 = vec_ld (16, ref);
659 ref += stride;
660 prev = vec_ld (2*stride, dest);
661 vec_st (tmp, stride, dest);
662 dest += 2*stride;
663 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
664 vec_perm (ref0, ref1, permB)));
665 } while (--height);
667 ref0 = vec_ld (0, ref);
668 ref1 = vec_ld (16, ref);
669 prev = vec_ld (stride, dest);
670 vec_st (tmp, 0, dest);
671 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
672 vec_perm (ref0, ref1, permB)));
673 vec_st (tmp, stride, dest);
676 static void MC_avg_x_8_altivec (uint8_t * dest, const uint8_t * ref,
677 const int stride, int height)
679 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
680 vector_u8_t prev;
682 ones = vec_splat_u8 (1);
683 tmp0 = vec_lvsl (0, ref);
684 tmp0 = vec_mergeh (tmp0, tmp0);
685 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
686 perm0B = vec_add (perm0A, ones);
687 tmp1 = vec_lvsl (stride, ref);
688 tmp1 = vec_mergeh (tmp1, tmp1);
689 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
690 perm1B = vec_add (perm1A, ones);
692 height = (height >> 1) - 1;
694 ref0 = vec_ld (0, ref);
695 ref1 = vec_ld (8, ref);
696 prev = vec_ld (0, dest);
697 ref += stride;
698 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
699 vec_perm (ref0, ref1, perm0B)));
701 do {
702 ref0 = vec_ld (0, ref);
703 ref1 = vec_ld (8, ref);
704 ref += stride;
705 prev = vec_ld (stride, dest);
706 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
707 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
708 dest += stride;
709 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
710 vec_perm (ref0, ref1, perm1B)));
712 ref0 = vec_ld (0, ref);
713 ref1 = vec_ld (8, ref);
714 ref += stride;
715 prev = vec_ld (stride, dest);
716 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
717 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
718 dest += stride;
719 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
720 vec_perm (ref0, ref1, perm0B)));
721 } while (--height);
723 ref0 = vec_ld (0, ref);
724 ref1 = vec_ld (8, ref);
725 prev = vec_ld (stride, dest);
726 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
727 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
728 dest += stride;
729 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
730 vec_perm (ref0, ref1, perm1B)));
731 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
732 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
735 static void MC_avg_y_16_altivec (uint8_t * dest, const uint8_t * ref,
736 const int stride, int height)
738 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev;
740 perm = vec_lvsl (0, ref);
742 height = (height >> 1) - 1;
744 ref0 = vec_ld (0, ref);
745 ref1 = vec_ld (15, ref);
746 ref += stride;
747 tmp0 = vec_perm (ref0, ref1, perm);
748 ref0 = vec_ld (0, ref);
749 ref1 = vec_ld (15, ref);
750 ref += stride;
751 prev = vec_ld (0, dest);
752 tmp1 = vec_perm (ref0, ref1, perm);
753 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
755 do {
756 ref0 = vec_ld (0, ref);
757 ref1 = vec_ld (15, ref);
758 ref += stride;
759 prev = vec_ld (stride, dest);
760 vec_st (tmp, 0, dest);
761 tmp0 = vec_perm (ref0, ref1, perm);
762 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
764 ref0 = vec_ld (0, ref);
765 ref1 = vec_ld (15, ref);
766 ref += stride;
767 prev = vec_ld (2*stride, dest);
768 vec_st (tmp, stride, dest);
769 dest += 2*stride;
770 tmp1 = vec_perm (ref0, ref1, perm);
771 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
772 } while (--height);
774 ref0 = vec_ld (0, ref);
775 ref1 = vec_ld (15, ref);
776 prev = vec_ld (stride, dest);
777 vec_st (tmp, 0, dest);
778 tmp0 = vec_perm (ref0, ref1, perm);
779 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
780 vec_st (tmp, stride, dest);
783 static void MC_avg_y_8_altivec (uint8_t * dest, const uint8_t * ref,
784 const int stride, int height)
786 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev;
788 tmp0 = vec_lvsl (0, ref);
789 tmp0 = vec_mergeh (tmp0, tmp0);
790 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
791 tmp1 = vec_lvsl (stride, ref);
792 tmp1 = vec_mergeh (tmp1, tmp1);
793 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
795 height = (height >> 1) - 1;
797 ref0 = vec_ld (0, ref);
798 ref1 = vec_ld (7, ref);
799 ref += stride;
800 tmp0 = vec_perm (ref0, ref1, perm0);
801 ref0 = vec_ld (0, ref);
802 ref1 = vec_ld (7, ref);
803 ref += stride;
804 prev = vec_ld (0, dest);
805 tmp1 = vec_perm (ref0, ref1, perm1);
806 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
808 do {
809 ref0 = vec_ld (0, ref);
810 ref1 = vec_ld (7, ref);
811 ref += stride;
812 prev = vec_ld (stride, dest);
813 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
814 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
815 dest += stride;
816 tmp0 = vec_perm (ref0, ref1, perm0);
817 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
819 ref0 = vec_ld (0, ref);
820 ref1 = vec_ld (7, ref);
821 ref += stride;
822 prev = vec_ld (stride, dest);
823 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
824 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
825 dest += stride;
826 tmp1 = vec_perm (ref0, ref1, perm1);
827 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
828 } while (--height);
830 ref0 = vec_ld (0, ref);
831 ref1 = vec_ld (7, ref);
832 prev = vec_ld (stride, dest);
833 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
834 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
835 dest += stride;
836 tmp0 = vec_perm (ref0, ref1, perm0);
837 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
838 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
839 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
842 static void MC_avg_xy_16_altivec (uint8_t * dest, const uint8_t * ref,
843 const int stride, int height)
845 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
846 vector_u8_t ones, prev;
848 ones = vec_splat_u8 (1);
849 permA = vec_lvsl (0, ref);
850 permB = vec_add (permA, ones);
852 height = (height >> 1) - 1;
854 ref0 = vec_ld (0, ref);
855 ref1 = vec_ld (16, ref);
856 ref += stride;
857 A = vec_perm (ref0, ref1, permA);
858 B = vec_perm (ref0, ref1, permB);
859 avg0 = vec_avg (A, B);
860 xor0 = vec_xor (A, B);
862 ref0 = vec_ld (0, ref);
863 ref1 = vec_ld (16, ref);
864 ref += stride;
865 prev = vec_ld (0, dest);
866 A = vec_perm (ref0, ref1, permA);
867 B = vec_perm (ref0, ref1, permB);
868 avg1 = vec_avg (A, B);
869 xor1 = vec_xor (A, B);
870 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
871 vec_and (vec_and (ones, vec_or (xor0, xor1)),
872 vec_xor (avg0, avg1))));
874 do {
875 ref0 = vec_ld (0, ref);
876 ref1 = vec_ld (16, ref);
877 ref += stride;
878 prev = vec_ld (stride, dest);
879 vec_st (tmp, 0, dest);
880 A = vec_perm (ref0, ref1, permA);
881 B = vec_perm (ref0, ref1, permB);
882 avg0 = vec_avg (A, B);
883 xor0 = vec_xor (A, B);
884 tmp = vec_avg (prev,
885 vec_sub (vec_avg (avg0, avg1),
886 vec_and (vec_and (ones, vec_or (xor0, xor1)),
887 vec_xor (avg0, avg1))));
889 ref0 = vec_ld (0, ref);
890 ref1 = vec_ld (16, ref);
891 ref += stride;
892 prev = vec_ld (2*stride, dest);
893 vec_st (tmp, stride, dest);
894 dest += 2*stride;
895 A = vec_perm (ref0, ref1, permA);
896 B = vec_perm (ref0, ref1, permB);
897 avg1 = vec_avg (A, B);
898 xor1 = vec_xor (A, B);
899 tmp = vec_avg (prev,
900 vec_sub (vec_avg (avg0, avg1),
901 vec_and (vec_and (ones, vec_or (xor0, xor1)),
902 vec_xor (avg0, avg1))));
903 } while (--height);
905 ref0 = vec_ld (0, ref);
906 ref1 = vec_ld (16, ref);
907 prev = vec_ld (stride, dest);
908 vec_st (tmp, 0, dest);
909 A = vec_perm (ref0, ref1, permA);
910 B = vec_perm (ref0, ref1, permB);
911 avg0 = vec_avg (A, B);
912 xor0 = vec_xor (A, B);
913 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
914 vec_and (vec_and (ones, vec_or (xor0, xor1)),
915 vec_xor (avg0, avg1))));
916 vec_st (tmp, stride, dest);
919 static void MC_avg_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
920 const int stride, int height)
922 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
923 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev;
925 ones = vec_splat_u8 (1);
926 perm0A = vec_lvsl (0, ref);
927 perm0A = vec_mergeh (perm0A, perm0A);
928 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
929 perm0B = vec_add (perm0A, ones);
930 perm1A = vec_lvsl (stride, ref);
931 perm1A = vec_mergeh (perm1A, perm1A);
932 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
933 perm1B = vec_add (perm1A, ones);
935 height = (height >> 1) - 1;
937 ref0 = vec_ld (0, ref);
938 ref1 = vec_ld (8, ref);
939 ref += stride;
940 A = vec_perm (ref0, ref1, perm0A);
941 B = vec_perm (ref0, ref1, perm0B);
942 avg0 = vec_avg (A, B);
943 xor0 = vec_xor (A, B);
945 ref0 = vec_ld (0, ref);
946 ref1 = vec_ld (8, ref);
947 ref += stride;
948 prev = vec_ld (0, dest);
949 A = vec_perm (ref0, ref1, perm1A);
950 B = vec_perm (ref0, ref1, perm1B);
951 avg1 = vec_avg (A, B);
952 xor1 = vec_xor (A, B);
953 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
954 vec_and (vec_and (ones, vec_or (xor0, xor1)),
955 vec_xor (avg0, avg1))));
957 do {
958 ref0 = vec_ld (0, ref);
959 ref1 = vec_ld (8, ref);
960 ref += stride;
961 prev = vec_ld (stride, dest);
962 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
963 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
964 dest += stride;
965 A = vec_perm (ref0, ref1, perm0A);
966 B = vec_perm (ref0, ref1, perm0B);
967 avg0 = vec_avg (A, B);
968 xor0 = vec_xor (A, B);
969 tmp = vec_avg (prev,
970 vec_sub (vec_avg (avg0, avg1),
971 vec_and (vec_and (ones, vec_or (xor0, xor1)),
972 vec_xor (avg0, avg1))));
974 ref0 = vec_ld (0, ref);
975 ref1 = vec_ld (8, ref);
976 ref += stride;
977 prev = vec_ld (stride, dest);
978 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
979 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
980 dest += stride;
981 A = vec_perm (ref0, ref1, perm1A);
982 B = vec_perm (ref0, ref1, perm1B);
983 avg1 = vec_avg (A, B);
984 xor1 = vec_xor (A, B);
985 tmp = vec_avg (prev,
986 vec_sub (vec_avg (avg0, avg1),
987 vec_and (vec_and (ones, vec_or (xor0, xor1)),
988 vec_xor (avg0, avg1))));
989 } while (--height);
991 ref0 = vec_ld (0, ref);
992 ref1 = vec_ld (8, ref);
993 prev = vec_ld (stride, dest);
994 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
995 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
996 dest += stride;
997 A = vec_perm (ref0, ref1, perm0A);
998 B = vec_perm (ref0, ref1, perm0B);
999 avg0 = vec_avg (A, B);
1000 xor0 = vec_xor (A, B);
1001 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
1002 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1003 vec_xor (avg0, avg1))));
1004 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1005 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1008 MPEG2_MC_EXTERN (altivec)
1010 #endif