dx50 = DX50
[mplayer/glamo.git] / libmpeg2 / motion_comp_altivec.c
blobf5d884e6e49a975709a7fd65c72568f27b9cd74c
1 /*
2 * motion_comp_altivec.c
3 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
7 * See http://libmpeg2.sourceforge.net/ for updates.
9 * mpeg2dec is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * mpeg2dec is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include "config.h"
26 #ifdef ARCH_PPC
28 #ifdef HAVE_ALTIVEC_H
29 #include <altivec.h>
30 #endif
31 #include <inttypes.h>
33 #include "mpeg2.h"
34 #include "mpeg2_internal.h"
36 typedef vector signed char vector_s8_t;
37 typedef vector unsigned char vector_u8_t;
38 typedef vector signed short vector_s16_t;
39 typedef vector unsigned short vector_u16_t;
40 typedef vector signed int vector_s32_t;
41 typedef vector unsigned int vector_u32_t;
43 #ifndef COFFEE_BREAK /* Workarounds for gcc suckage */
45 static inline vector_u8_t my_vec_ld (int const A, const uint8_t * const B)
47 return vec_ld (A, (uint8_t *)B);
49 #undef vec_ld
50 #define vec_ld my_vec_ld
52 static inline vector_u8_t my_vec_and (vector_u8_t const A, vector_u8_t const B)
54 return vec_and (A, B);
56 #undef vec_and
57 #define vec_and my_vec_and
59 static inline vector_u8_t my_vec_avg (vector_u8_t const A, vector_u8_t const B)
61 return vec_avg (A, B);
63 #undef vec_avg
64 #define vec_avg my_vec_avg
66 #endif
68 static void MC_put_o_16_altivec (uint8_t * dest, const uint8_t * ref,
69 const int stride, int height)
71 vector_u8_t perm, ref0, ref1, tmp;
73 perm = vec_lvsl (0, ref);
75 height = (height >> 1) - 1;
77 ref0 = vec_ld (0, ref);
78 ref1 = vec_ld (15, ref);
79 ref += stride;
80 tmp = vec_perm (ref0, ref1, perm);
82 do {
83 ref0 = vec_ld (0, ref);
84 ref1 = vec_ld (15, ref);
85 ref += stride;
86 vec_st (tmp, 0, dest);
87 tmp = vec_perm (ref0, ref1, perm);
89 ref0 = vec_ld (0, ref);
90 ref1 = vec_ld (15, ref);
91 ref += stride;
92 vec_st (tmp, stride, dest);
93 dest += 2*stride;
94 tmp = vec_perm (ref0, ref1, perm);
95 } while (--height);
97 ref0 = vec_ld (0, ref);
98 ref1 = vec_ld (15, ref);
99 vec_st (tmp, 0, dest);
100 tmp = vec_perm (ref0, ref1, perm);
101 vec_st (tmp, stride, dest);
104 static void MC_put_o_8_altivec (uint8_t * dest, const uint8_t * ref,
105 const int stride, int height)
107 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1;
109 tmp0 = vec_lvsl (0, ref);
110 tmp0 = vec_mergeh (tmp0, tmp0);
111 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
112 tmp1 = vec_lvsl (stride, ref);
113 tmp1 = vec_mergeh (tmp1, tmp1);
114 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
116 height = (height >> 1) - 1;
118 ref0 = vec_ld (0, ref);
119 ref1 = vec_ld (7, ref);
120 ref += stride;
121 tmp0 = vec_perm (ref0, ref1, perm0);
123 do {
124 ref0 = vec_ld (0, ref);
125 ref1 = vec_ld (7, ref);
126 ref += stride;
127 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
128 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
129 dest += stride;
130 tmp1 = vec_perm (ref0, ref1, perm1);
132 ref0 = vec_ld (0, ref);
133 ref1 = vec_ld (7, ref);
134 ref += stride;
135 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
136 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
137 dest += stride;
138 tmp0 = vec_perm (ref0, ref1, perm0);
139 } while (--height);
141 ref0 = vec_ld (0, ref);
142 ref1 = vec_ld (7, ref);
143 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
144 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
145 dest += stride;
146 tmp1 = vec_perm (ref0, ref1, perm1);
147 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
148 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
151 static void MC_put_x_16_altivec (uint8_t * dest, const uint8_t * ref,
152 const int stride, int height)
154 vector_u8_t permA, permB, ref0, ref1, tmp;
156 permA = vec_lvsl (0, ref);
157 permB = vec_add (permA, vec_splat_u8 (1));
159 height = (height >> 1) - 1;
161 ref0 = vec_ld (0, ref);
162 ref1 = vec_ld (16, ref);
163 ref += stride;
164 tmp = vec_avg (vec_perm (ref0, ref1, permA),
165 vec_perm (ref0, ref1, permB));
167 do {
168 ref0 = vec_ld (0, ref);
169 ref1 = vec_ld (16, ref);
170 ref += stride;
171 vec_st (tmp, 0, dest);
172 tmp = vec_avg (vec_perm (ref0, ref1, permA),
173 vec_perm (ref0, ref1, permB));
175 ref0 = vec_ld (0, ref);
176 ref1 = vec_ld (16, ref);
177 ref += stride;
178 vec_st (tmp, stride, dest);
179 dest += 2*stride;
180 tmp = vec_avg (vec_perm (ref0, ref1, permA),
181 vec_perm (ref0, ref1, permB));
182 } while (--height);
184 ref0 = vec_ld (0, ref);
185 ref1 = vec_ld (16, ref);
186 vec_st (tmp, 0, dest);
187 tmp = vec_avg (vec_perm (ref0, ref1, permA),
188 vec_perm (ref0, ref1, permB));
189 vec_st (tmp, stride, dest);
192 static void MC_put_x_8_altivec (uint8_t * dest, const uint8_t * ref,
193 const int stride, int height)
195 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
197 ones = vec_splat_u8 (1);
198 tmp0 = vec_lvsl (0, ref);
199 tmp0 = vec_mergeh (tmp0, tmp0);
200 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
201 perm0B = vec_add (perm0A, ones);
202 tmp1 = vec_lvsl (stride, ref);
203 tmp1 = vec_mergeh (tmp1, tmp1);
204 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
205 perm1B = vec_add (perm1A, ones);
207 height = (height >> 1) - 1;
209 ref0 = vec_ld (0, ref);
210 ref1 = vec_ld (8, ref);
211 ref += stride;
212 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
213 vec_perm (ref0, ref1, perm0B));
215 do {
216 ref0 = vec_ld (0, ref);
217 ref1 = vec_ld (8, ref);
218 ref += stride;
219 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
220 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
221 dest += stride;
222 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
223 vec_perm (ref0, ref1, perm1B));
225 ref0 = vec_ld (0, ref);
226 ref1 = vec_ld (8, ref);
227 ref += stride;
228 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
229 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
230 dest += stride;
231 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
232 vec_perm (ref0, ref1, perm0B));
233 } while (--height);
235 ref0 = vec_ld (0, ref);
236 ref1 = vec_ld (8, ref);
237 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
238 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
239 dest += stride;
240 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
241 vec_perm (ref0, ref1, perm1B));
242 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
243 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
246 static void MC_put_y_16_altivec (uint8_t * dest, const uint8_t * ref,
247 const int stride, int height)
249 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp;
251 perm = vec_lvsl (0, ref);
253 height = (height >> 1) - 1;
255 ref0 = vec_ld (0, ref);
256 ref1 = vec_ld (15, ref);
257 ref += stride;
258 tmp0 = vec_perm (ref0, ref1, perm);
259 ref0 = vec_ld (0, ref);
260 ref1 = vec_ld (15, ref);
261 ref += stride;
262 tmp1 = vec_perm (ref0, ref1, perm);
263 tmp = vec_avg (tmp0, tmp1);
265 do {
266 ref0 = vec_ld (0, ref);
267 ref1 = vec_ld (15, ref);
268 ref += stride;
269 vec_st (tmp, 0, dest);
270 tmp0 = vec_perm (ref0, ref1, perm);
271 tmp = vec_avg (tmp0, tmp1);
273 ref0 = vec_ld (0, ref);
274 ref1 = vec_ld (15, ref);
275 ref += stride;
276 vec_st (tmp, stride, dest);
277 dest += 2*stride;
278 tmp1 = vec_perm (ref0, ref1, perm);
279 tmp = vec_avg (tmp0, tmp1);
280 } while (--height);
282 ref0 = vec_ld (0, ref);
283 ref1 = vec_ld (15, ref);
284 vec_st (tmp, 0, dest);
285 tmp0 = vec_perm (ref0, ref1, perm);
286 tmp = vec_avg (tmp0, tmp1);
287 vec_st (tmp, stride, dest);
290 static void MC_put_y_8_altivec (uint8_t * dest, const uint8_t * ref,
291 const int stride, int height)
293 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1;
295 tmp0 = vec_lvsl (0, ref);
296 tmp0 = vec_mergeh (tmp0, tmp0);
297 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
298 tmp1 = vec_lvsl (stride, ref);
299 tmp1 = vec_mergeh (tmp1, tmp1);
300 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
302 height = (height >> 1) - 1;
304 ref0 = vec_ld (0, ref);
305 ref1 = vec_ld (7, ref);
306 ref += stride;
307 tmp0 = vec_perm (ref0, ref1, perm0);
308 ref0 = vec_ld (0, ref);
309 ref1 = vec_ld (7, ref);
310 ref += stride;
311 tmp1 = vec_perm (ref0, ref1, perm1);
312 tmp = vec_avg (tmp0, tmp1);
314 do {
315 ref0 = vec_ld (0, ref);
316 ref1 = vec_ld (7, ref);
317 ref += stride;
318 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
319 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
320 dest += stride;
321 tmp0 = vec_perm (ref0, ref1, perm0);
322 tmp = vec_avg (tmp0, tmp1);
324 ref0 = vec_ld (0, ref);
325 ref1 = vec_ld (7, ref);
326 ref += stride;
327 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
328 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
329 dest += stride;
330 tmp1 = vec_perm (ref0, ref1, perm1);
331 tmp = vec_avg (tmp0, tmp1);
332 } while (--height);
334 ref0 = vec_ld (0, ref);
335 ref1 = vec_ld (7, ref);
336 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
337 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
338 dest += stride;
339 tmp0 = vec_perm (ref0, ref1, perm0);
340 tmp = vec_avg (tmp0, tmp1);
341 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
342 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
345 static void MC_put_xy_16_altivec (uint8_t * dest, const uint8_t * ref,
346 const int stride, int height)
348 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
349 vector_u8_t ones;
351 ones = vec_splat_u8 (1);
352 permA = vec_lvsl (0, ref);
353 permB = vec_add (permA, ones);
355 height = (height >> 1) - 1;
357 ref0 = vec_ld (0, ref);
358 ref1 = vec_ld (16, ref);
359 ref += stride;
360 A = vec_perm (ref0, ref1, permA);
361 B = vec_perm (ref0, ref1, permB);
362 avg0 = vec_avg (A, B);
363 xor0 = vec_xor (A, B);
365 ref0 = vec_ld (0, ref);
366 ref1 = vec_ld (16, ref);
367 ref += stride;
368 A = vec_perm (ref0, ref1, permA);
369 B = vec_perm (ref0, ref1, permB);
370 avg1 = vec_avg (A, B);
371 xor1 = vec_xor (A, B);
372 tmp = vec_sub (vec_avg (avg0, avg1),
373 vec_and (vec_and (ones, vec_or (xor0, xor1)),
374 vec_xor (avg0, avg1)));
376 do {
377 ref0 = vec_ld (0, ref);
378 ref1 = vec_ld (16, ref);
379 ref += stride;
380 vec_st (tmp, 0, dest);
381 A = vec_perm (ref0, ref1, permA);
382 B = vec_perm (ref0, ref1, permB);
383 avg0 = vec_avg (A, B);
384 xor0 = vec_xor (A, B);
385 tmp = vec_sub (vec_avg (avg0, avg1),
386 vec_and (vec_and (ones, vec_or (xor0, xor1)),
387 vec_xor (avg0, avg1)));
389 ref0 = vec_ld (0, ref);
390 ref1 = vec_ld (16, ref);
391 ref += stride;
392 vec_st (tmp, stride, dest);
393 dest += 2*stride;
394 A = vec_perm (ref0, ref1, permA);
395 B = vec_perm (ref0, ref1, permB);
396 avg1 = vec_avg (A, B);
397 xor1 = vec_xor (A, B);
398 tmp = vec_sub (vec_avg (avg0, avg1),
399 vec_and (vec_and (ones, vec_or (xor0, xor1)),
400 vec_xor (avg0, avg1)));
401 } while (--height);
403 ref0 = vec_ld (0, ref);
404 ref1 = vec_ld (16, ref);
405 vec_st (tmp, 0, dest);
406 A = vec_perm (ref0, ref1, permA);
407 B = vec_perm (ref0, ref1, permB);
408 avg0 = vec_avg (A, B);
409 xor0 = vec_xor (A, B);
410 tmp = vec_sub (vec_avg (avg0, avg1),
411 vec_and (vec_and (ones, vec_or (xor0, xor1)),
412 vec_xor (avg0, avg1)));
413 vec_st (tmp, stride, dest);
416 static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
417 const int stride, int height)
419 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
420 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones;
422 ones = vec_splat_u8 (1);
423 perm0A = vec_lvsl (0, ref);
424 perm0A = vec_mergeh (perm0A, perm0A);
425 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
426 perm0B = vec_add (perm0A, ones);
427 perm1A = vec_lvsl (stride, ref);
428 perm1A = vec_mergeh (perm1A, perm1A);
429 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
430 perm1B = vec_add (perm1A, ones);
432 height = (height >> 1) - 1;
434 ref0 = vec_ld (0, ref);
435 ref1 = vec_ld (8, ref);
436 ref += stride;
437 A = vec_perm (ref0, ref1, perm0A);
438 B = vec_perm (ref0, ref1, perm0B);
439 avg0 = vec_avg (A, B);
440 xor0 = vec_xor (A, B);
442 ref0 = vec_ld (0, ref);
443 ref1 = vec_ld (8, ref);
444 ref += stride;
445 A = vec_perm (ref0, ref1, perm1A);
446 B = vec_perm (ref0, ref1, perm1B);
447 avg1 = vec_avg (A, B);
448 xor1 = vec_xor (A, B);
449 tmp = vec_sub (vec_avg (avg0, avg1),
450 vec_and (vec_and (ones, vec_or (xor0, xor1)),
451 vec_xor (avg0, avg1)));
453 do {
454 ref0 = vec_ld (0, ref);
455 ref1 = vec_ld (8, ref);
456 ref += stride;
457 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
458 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
459 dest += stride;
460 A = vec_perm (ref0, ref1, perm0A);
461 B = vec_perm (ref0, ref1, perm0B);
462 avg0 = vec_avg (A, B);
463 xor0 = vec_xor (A, B);
464 tmp = vec_sub (vec_avg (avg0, avg1),
465 vec_and (vec_and (ones, vec_or (xor0, xor1)),
466 vec_xor (avg0, avg1)));
468 ref0 = vec_ld (0, ref);
469 ref1 = vec_ld (8, ref);
470 ref += stride;
471 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
472 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
473 dest += stride;
474 A = vec_perm (ref0, ref1, perm1A);
475 B = vec_perm (ref0, ref1, perm1B);
476 avg1 = vec_avg (A, B);
477 xor1 = vec_xor (A, B);
478 tmp = vec_sub (vec_avg (avg0, avg1),
479 vec_and (vec_and (ones, vec_or (xor0, xor1)),
480 vec_xor (avg0, avg1)));
481 } while (--height);
483 ref0 = vec_ld (0, ref);
484 ref1 = vec_ld (8, ref);
485 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
486 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
487 dest += stride;
488 A = vec_perm (ref0, ref1, perm0A);
489 B = vec_perm (ref0, ref1, perm0B);
490 avg0 = vec_avg (A, B);
491 xor0 = vec_xor (A, B);
492 tmp = vec_sub (vec_avg (avg0, avg1),
493 vec_and (vec_and (ones, vec_or (xor0, xor1)),
494 vec_xor (avg0, avg1)));
495 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
496 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
499 #if 0
500 static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
501 const int stride, int height)
503 vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones;
504 vector_u16_t splat2, temp;
506 ones = vec_splat_u8 (1);
507 permA = vec_lvsl (0, ref);
508 permB = vec_add (permA, ones);
510 zero = vec_splat_u8 (0);
511 splat2 = vec_splat_u16 (2);
513 do {
514 ref0 = vec_ld (0, ref);
515 ref1 = vec_ld (8, ref);
516 ref += stride;
517 A = vec_perm (ref0, ref1, permA);
518 B = vec_perm (ref0, ref1, permB);
519 ref0 = vec_ld (0, ref);
520 ref1 = vec_ld (8, ref);
521 C = vec_perm (ref0, ref1, permA);
522 D = vec_perm (ref0, ref1, permB);
524 temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A),
525 (vector_u16_t)vec_mergeh (zero, B)),
526 vec_add ((vector_u16_t)vec_mergeh (zero, C),
527 (vector_u16_t)vec_mergeh (zero, D)));
528 temp = vec_sr (vec_add (temp, splat2), splat2);
529 tmp = vec_pack (temp, temp);
531 vec_st (tmp, 0, dest);
532 dest += stride;
533 tmp = vec_avg (vec_perm (ref0, ref1, permA),
534 vec_perm (ref0, ref1, permB));
535 } while (--height);
537 #endif
539 static void MC_avg_o_16_altivec (uint8_t * dest, const uint8_t * ref,
540 const int stride, int height)
542 vector_u8_t perm, ref0, ref1, tmp, prev;
544 perm = vec_lvsl (0, ref);
546 height = (height >> 1) - 1;
548 ref0 = vec_ld (0, ref);
549 ref1 = vec_ld (15, ref);
550 ref += stride;
551 prev = vec_ld (0, dest);
552 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
554 do {
555 ref0 = vec_ld (0, ref);
556 ref1 = vec_ld (15, ref);
557 ref += stride;
558 prev = vec_ld (stride, dest);
559 vec_st (tmp, 0, dest);
560 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
562 ref0 = vec_ld (0, ref);
563 ref1 = vec_ld (15, ref);
564 ref += stride;
565 prev = vec_ld (2*stride, dest);
566 vec_st (tmp, stride, dest);
567 dest += 2*stride;
568 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
569 } while (--height);
571 ref0 = vec_ld (0, ref);
572 ref1 = vec_ld (15, ref);
573 prev = vec_ld (stride, dest);
574 vec_st (tmp, 0, dest);
575 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
576 vec_st (tmp, stride, dest);
579 static void MC_avg_o_8_altivec (uint8_t * dest, const uint8_t * ref,
580 const int stride, int height)
582 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev;
584 tmp0 = vec_lvsl (0, ref);
585 tmp0 = vec_mergeh (tmp0, tmp0);
586 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
587 tmp1 = vec_lvsl (stride, ref);
588 tmp1 = vec_mergeh (tmp1, tmp1);
589 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
591 height = (height >> 1) - 1;
593 ref0 = vec_ld (0, ref);
594 ref1 = vec_ld (7, ref);
595 ref += stride;
596 prev = vec_ld (0, dest);
597 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
599 do {
600 ref0 = vec_ld (0, ref);
601 ref1 = vec_ld (7, ref);
602 ref += stride;
603 prev = vec_ld (stride, dest);
604 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
605 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
606 dest += stride;
607 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
609 ref0 = vec_ld (0, ref);
610 ref1 = vec_ld (7, ref);
611 ref += stride;
612 prev = vec_ld (stride, dest);
613 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
614 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
615 dest += stride;
616 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
617 } while (--height);
619 ref0 = vec_ld (0, ref);
620 ref1 = vec_ld (7, ref);
621 prev = vec_ld (stride, dest);
622 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
623 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
624 dest += stride;
625 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
626 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
627 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
630 static void MC_avg_x_16_altivec (uint8_t * dest, const uint8_t * ref,
631 const int stride, int height)
633 vector_u8_t permA, permB, ref0, ref1, tmp, prev;
635 permA = vec_lvsl (0, ref);
636 permB = vec_add (permA, vec_splat_u8 (1));
638 height = (height >> 1) - 1;
640 ref0 = vec_ld (0, ref);
641 ref1 = vec_ld (16, ref);
642 prev = vec_ld (0, dest);
643 ref += stride;
644 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
645 vec_perm (ref0, ref1, permB)));
647 do {
648 ref0 = vec_ld (0, ref);
649 ref1 = vec_ld (16, ref);
650 ref += stride;
651 prev = vec_ld (stride, dest);
652 vec_st (tmp, 0, dest);
653 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
654 vec_perm (ref0, ref1, permB)));
656 ref0 = vec_ld (0, ref);
657 ref1 = vec_ld (16, ref);
658 ref += stride;
659 prev = vec_ld (2*stride, dest);
660 vec_st (tmp, stride, dest);
661 dest += 2*stride;
662 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
663 vec_perm (ref0, ref1, permB)));
664 } while (--height);
666 ref0 = vec_ld (0, ref);
667 ref1 = vec_ld (16, ref);
668 prev = vec_ld (stride, dest);
669 vec_st (tmp, 0, dest);
670 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
671 vec_perm (ref0, ref1, permB)));
672 vec_st (tmp, stride, dest);
675 static void MC_avg_x_8_altivec (uint8_t * dest, const uint8_t * ref,
676 const int stride, int height)
678 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
679 vector_u8_t prev;
681 ones = vec_splat_u8 (1);
682 tmp0 = vec_lvsl (0, ref);
683 tmp0 = vec_mergeh (tmp0, tmp0);
684 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
685 perm0B = vec_add (perm0A, ones);
686 tmp1 = vec_lvsl (stride, ref);
687 tmp1 = vec_mergeh (tmp1, tmp1);
688 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
689 perm1B = vec_add (perm1A, ones);
691 height = (height >> 1) - 1;
693 ref0 = vec_ld (0, ref);
694 ref1 = vec_ld (8, ref);
695 prev = vec_ld (0, dest);
696 ref += stride;
697 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
698 vec_perm (ref0, ref1, perm0B)));
700 do {
701 ref0 = vec_ld (0, ref);
702 ref1 = vec_ld (8, ref);
703 ref += stride;
704 prev = vec_ld (stride, dest);
705 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
706 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
707 dest += stride;
708 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
709 vec_perm (ref0, ref1, perm1B)));
711 ref0 = vec_ld (0, ref);
712 ref1 = vec_ld (8, ref);
713 ref += stride;
714 prev = vec_ld (stride, dest);
715 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
716 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
717 dest += stride;
718 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
719 vec_perm (ref0, ref1, perm0B)));
720 } while (--height);
722 ref0 = vec_ld (0, ref);
723 ref1 = vec_ld (8, ref);
724 prev = vec_ld (stride, dest);
725 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
726 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
727 dest += stride;
728 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
729 vec_perm (ref0, ref1, perm1B)));
730 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
731 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
734 static void MC_avg_y_16_altivec (uint8_t * dest, const uint8_t * ref,
735 const int stride, int height)
737 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev;
739 perm = vec_lvsl (0, ref);
741 height = (height >> 1) - 1;
743 ref0 = vec_ld (0, ref);
744 ref1 = vec_ld (15, ref);
745 ref += stride;
746 tmp0 = vec_perm (ref0, ref1, perm);
747 ref0 = vec_ld (0, ref);
748 ref1 = vec_ld (15, ref);
749 ref += stride;
750 prev = vec_ld (0, dest);
751 tmp1 = vec_perm (ref0, ref1, perm);
752 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
754 do {
755 ref0 = vec_ld (0, ref);
756 ref1 = vec_ld (15, ref);
757 ref += stride;
758 prev = vec_ld (stride, dest);
759 vec_st (tmp, 0, dest);
760 tmp0 = vec_perm (ref0, ref1, perm);
761 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
763 ref0 = vec_ld (0, ref);
764 ref1 = vec_ld (15, ref);
765 ref += stride;
766 prev = vec_ld (2*stride, dest);
767 vec_st (tmp, stride, dest);
768 dest += 2*stride;
769 tmp1 = vec_perm (ref0, ref1, perm);
770 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
771 } while (--height);
773 ref0 = vec_ld (0, ref);
774 ref1 = vec_ld (15, ref);
775 prev = vec_ld (stride, dest);
776 vec_st (tmp, 0, dest);
777 tmp0 = vec_perm (ref0, ref1, perm);
778 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
779 vec_st (tmp, stride, dest);
782 static void MC_avg_y_8_altivec (uint8_t * dest, const uint8_t * ref,
783 const int stride, int height)
785 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev;
787 tmp0 = vec_lvsl (0, ref);
788 tmp0 = vec_mergeh (tmp0, tmp0);
789 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
790 tmp1 = vec_lvsl (stride, ref);
791 tmp1 = vec_mergeh (tmp1, tmp1);
792 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
794 height = (height >> 1) - 1;
796 ref0 = vec_ld (0, ref);
797 ref1 = vec_ld (7, ref);
798 ref += stride;
799 tmp0 = vec_perm (ref0, ref1, perm0);
800 ref0 = vec_ld (0, ref);
801 ref1 = vec_ld (7, ref);
802 ref += stride;
803 prev = vec_ld (0, dest);
804 tmp1 = vec_perm (ref0, ref1, perm1);
805 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
807 do {
808 ref0 = vec_ld (0, ref);
809 ref1 = vec_ld (7, ref);
810 ref += stride;
811 prev = vec_ld (stride, dest);
812 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
813 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
814 dest += stride;
815 tmp0 = vec_perm (ref0, ref1, perm0);
816 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
818 ref0 = vec_ld (0, ref);
819 ref1 = vec_ld (7, ref);
820 ref += stride;
821 prev = vec_ld (stride, dest);
822 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
823 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
824 dest += stride;
825 tmp1 = vec_perm (ref0, ref1, perm1);
826 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
827 } while (--height);
829 ref0 = vec_ld (0, ref);
830 ref1 = vec_ld (7, ref);
831 prev = vec_ld (stride, dest);
832 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
833 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
834 dest += stride;
835 tmp0 = vec_perm (ref0, ref1, perm0);
836 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
837 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
838 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
841 static void MC_avg_xy_16_altivec (uint8_t * dest, const uint8_t * ref,
842 const int stride, int height)
844 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
845 vector_u8_t ones, prev;
847 ones = vec_splat_u8 (1);
848 permA = vec_lvsl (0, ref);
849 permB = vec_add (permA, ones);
851 height = (height >> 1) - 1;
853 ref0 = vec_ld (0, ref);
854 ref1 = vec_ld (16, ref);
855 ref += stride;
856 A = vec_perm (ref0, ref1, permA);
857 B = vec_perm (ref0, ref1, permB);
858 avg0 = vec_avg (A, B);
859 xor0 = vec_xor (A, B);
861 ref0 = vec_ld (0, ref);
862 ref1 = vec_ld (16, ref);
863 ref += stride;
864 prev = vec_ld (0, dest);
865 A = vec_perm (ref0, ref1, permA);
866 B = vec_perm (ref0, ref1, permB);
867 avg1 = vec_avg (A, B);
868 xor1 = vec_xor (A, B);
869 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
870 vec_and (vec_and (ones, vec_or (xor0, xor1)),
871 vec_xor (avg0, avg1))));
873 do {
874 ref0 = vec_ld (0, ref);
875 ref1 = vec_ld (16, ref);
876 ref += stride;
877 prev = vec_ld (stride, dest);
878 vec_st (tmp, 0, dest);
879 A = vec_perm (ref0, ref1, permA);
880 B = vec_perm (ref0, ref1, permB);
881 avg0 = vec_avg (A, B);
882 xor0 = vec_xor (A, B);
883 tmp = vec_avg (prev,
884 vec_sub (vec_avg (avg0, avg1),
885 vec_and (vec_and (ones, vec_or (xor0, xor1)),
886 vec_xor (avg0, avg1))));
888 ref0 = vec_ld (0, ref);
889 ref1 = vec_ld (16, ref);
890 ref += stride;
891 prev = vec_ld (2*stride, dest);
892 vec_st (tmp, stride, dest);
893 dest += 2*stride;
894 A = vec_perm (ref0, ref1, permA);
895 B = vec_perm (ref0, ref1, permB);
896 avg1 = vec_avg (A, B);
897 xor1 = vec_xor (A, B);
898 tmp = vec_avg (prev,
899 vec_sub (vec_avg (avg0, avg1),
900 vec_and (vec_and (ones, vec_or (xor0, xor1)),
901 vec_xor (avg0, avg1))));
902 } while (--height);
904 ref0 = vec_ld (0, ref);
905 ref1 = vec_ld (16, ref);
906 prev = vec_ld (stride, dest);
907 vec_st (tmp, 0, dest);
908 A = vec_perm (ref0, ref1, permA);
909 B = vec_perm (ref0, ref1, permB);
910 avg0 = vec_avg (A, B);
911 xor0 = vec_xor (A, B);
912 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
913 vec_and (vec_and (ones, vec_or (xor0, xor1)),
914 vec_xor (avg0, avg1))));
915 vec_st (tmp, stride, dest);
918 static void MC_avg_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
919 const int stride, int height)
921 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
922 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev;
924 ones = vec_splat_u8 (1);
925 perm0A = vec_lvsl (0, ref);
926 perm0A = vec_mergeh (perm0A, perm0A);
927 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
928 perm0B = vec_add (perm0A, ones);
929 perm1A = vec_lvsl (stride, ref);
930 perm1A = vec_mergeh (perm1A, perm1A);
931 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
932 perm1B = vec_add (perm1A, ones);
934 height = (height >> 1) - 1;
936 ref0 = vec_ld (0, ref);
937 ref1 = vec_ld (8, ref);
938 ref += stride;
939 A = vec_perm (ref0, ref1, perm0A);
940 B = vec_perm (ref0, ref1, perm0B);
941 avg0 = vec_avg (A, B);
942 xor0 = vec_xor (A, B);
944 ref0 = vec_ld (0, ref);
945 ref1 = vec_ld (8, ref);
946 ref += stride;
947 prev = vec_ld (0, dest);
948 A = vec_perm (ref0, ref1, perm1A);
949 B = vec_perm (ref0, ref1, perm1B);
950 avg1 = vec_avg (A, B);
951 xor1 = vec_xor (A, B);
952 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
953 vec_and (vec_and (ones, vec_or (xor0, xor1)),
954 vec_xor (avg0, avg1))));
956 do {
957 ref0 = vec_ld (0, ref);
958 ref1 = vec_ld (8, ref);
959 ref += stride;
960 prev = vec_ld (stride, dest);
961 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
962 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
963 dest += stride;
964 A = vec_perm (ref0, ref1, perm0A);
965 B = vec_perm (ref0, ref1, perm0B);
966 avg0 = vec_avg (A, B);
967 xor0 = vec_xor (A, B);
968 tmp = vec_avg (prev,
969 vec_sub (vec_avg (avg0, avg1),
970 vec_and (vec_and (ones, vec_or (xor0, xor1)),
971 vec_xor (avg0, avg1))));
973 ref0 = vec_ld (0, ref);
974 ref1 = vec_ld (8, ref);
975 ref += stride;
976 prev = vec_ld (stride, dest);
977 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
978 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
979 dest += stride;
980 A = vec_perm (ref0, ref1, perm1A);
981 B = vec_perm (ref0, ref1, perm1B);
982 avg1 = vec_avg (A, B);
983 xor1 = vec_xor (A, B);
984 tmp = vec_avg (prev,
985 vec_sub (vec_avg (avg0, avg1),
986 vec_and (vec_and (ones, vec_or (xor0, xor1)),
987 vec_xor (avg0, avg1))));
988 } while (--height);
990 ref0 = vec_ld (0, ref);
991 ref1 = vec_ld (8, ref);
992 prev = vec_ld (stride, dest);
993 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
994 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
995 dest += stride;
996 A = vec_perm (ref0, ref1, perm0A);
997 B = vec_perm (ref0, ref1, perm0B);
998 avg0 = vec_avg (A, B);
999 xor0 = vec_xor (A, B);
1000 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
1001 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1002 vec_xor (avg0, avg1))));
1003 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1004 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1007 MPEG2_MC_EXTERN (altivec)
1009 #endif