1 Index: /common/ppc/quant.c
2 ===================================================================
3 --- /common/ppc/quant.c (revision 601)
4 +++ /common/ppc/quant.c (revision 621)
6 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
7 *****************************************************************************/
13 #include "common/common.h"
15 temp2v = vec_xor(temp2v, mskB); \
16 temp1v = vec_adds(temp1v, vec_and(mskA, one)); \
17 -vec_st(temp1v, (dct0), dct); \
18 +vec_st(temp1v, (dct0), (int16_t*)dct); \
19 temp2v = vec_adds(temp2v, vec_and(mskB, one)); \
20 -vec_st(temp2v, (dct1), dct);
21 +vec_st(temp2v, (dct1), (int16_t*)dct);
23 void x264_quant_4x4_altivec( int16_t dct[4][4], int quant_mf[4][4], int const i_qbits, int const f ) {
24 vector bool short mskA;
28 vec_u32_t multEvenvA, multOddvA;
35 vector bool short mskB;
37 vec_u32_t multEvenvB, multOddvB;
41 vec_s16_t temp1v, temp2v;
43 - vect_sint_u qbits_u;
46 i_qbitsv = vec_splat(qbits_u.v, 0);
53 temp2v = vec_xor(temp2v, mskB); \
54 temp1v = vec_add(temp1v, vec_and(mskA, one)); \
55 -vec_st(temp1v, (dct0), dct); \
56 +vec_st(temp1v, (dct0), (int16_t*)dct); \
57 temp2v = vec_add(temp2v, vec_and(mskB, one)); \
58 -vec_st(temp2v, (dct1), dct);
59 +vec_st(temp2v, (dct1), (int16_t*)dct);
62 void x264_quant_4x4_dc_altivec( int16_t dct[4][4], int i_quant_mf, int const i_qbits, int const f ) {
63 vector bool short mskA;
67 vec_u32_t multEvenvA, multOddvA;
72 vector bool short mskB;
74 vec_s16_t temp1v, temp2v;
81 mfv = vec_splat( mf_u.v, 0 );
82 - mfv = vec_packs( mfv, mfv);
84 - vect_sint_u qbits_u;
87 i_qbitsv = vec_splat(qbits_u.v, 0);
92 fV = vec_splat(f_u.v, 0);
94 void x264_quant_8x8_altivec( int16_t dct[8][8], int quant_mf[8][8], int const i_qbits, int const f ) {
95 vector bool short mskA;
99 - vec_s32_t multEvenvA, multOddvA, mfvA;
100 + vec_u32_t multEvenvA, multOddvA;
102 vec_s16_t zerov, one;
106 vector bool short mskB;
108 - vec_u32_t multEvenvB, multOddvB, mfvB;
109 + vec_u32_t multEvenvB, multOddvB;
112 vec_s16_t temp1v, temp2v;
114 i_qbitsv = vec_splat(qbits_u.v, 0);
119 fV = vec_splat(f_u.v, 0);
120 Index: /common/ppc/dct.c
121 ===================================================================
122 --- /common/ppc/dct.c (revision 604)
123 +++ /common/ppc/dct.c (revision 621)
125 VEC_DCT( dct0v, dct1v, dct2v, dct3v, tmp0v, tmp1v, tmp2v, tmp3v );
127 - vec_st(vec_perm(tmp0v, tmp1v, permHighv), 0, dct);
128 - vec_st(vec_perm(tmp2v, tmp3v, permHighv), 16, dct);
129 + vec_st(vec_perm(tmp0v, tmp1v, permHighv), 0, (int16_t*)dct);
130 + vec_st(vec_perm(tmp2v, tmp3v, permHighv), 16, (int16_t*)dct);
134 VEC_DCT( dct4v, dct5v, dct6v, dct7v, tmp4v, tmp5v, tmp6v, tmp7v );
136 - vec_st(vec_perm(tmp0v, tmp1v, permHighv), 0, dct);
137 - vec_st(vec_perm(tmp2v, tmp3v, permHighv), 16, dct);
138 - vec_st(vec_perm(tmp4v, tmp5v, permHighv), 32, dct);
139 - vec_st(vec_perm(tmp6v, tmp7v, permHighv), 48, dct);
140 - vec_st(vec_perm(tmp0v, tmp1v, permLowv), 64, dct);
141 - vec_st(vec_perm(tmp2v, tmp3v, permLowv), 80, dct);
142 - vec_st(vec_perm(tmp4v, tmp5v, permLowv), 96, dct);
143 - vec_st(vec_perm(tmp6v, tmp7v, permLowv), 112, dct);
144 + vec_st(vec_perm(tmp0v, tmp1v, permHighv), 0, (int16_t*)dct);
145 + vec_st(vec_perm(tmp2v, tmp3v, permHighv), 16, (int16_t*)dct);
146 + vec_st(vec_perm(tmp4v, tmp5v, permHighv), 32, (int16_t*)dct);
147 + vec_st(vec_perm(tmp6v, tmp7v, permHighv), 48, (int16_t*)dct);
148 + vec_st(vec_perm(tmp0v, tmp1v, permLowv), 64, (int16_t*)dct);
149 + vec_st(vec_perm(tmp2v, tmp3v, permLowv), 80, (int16_t*)dct);
150 + vec_st(vec_perm(tmp4v, tmp5v, permLowv), 96, (int16_t*)dct);
151 + vec_st(vec_perm(tmp6v, tmp7v, permLowv), 112, (int16_t*)dct);
155 void x264_add8x8_idct8_altivec( uint8_t *dst, int16_t dct[8][8] )
157 - vec_s16_t onev = vec_splat_s16(1);
158 - vec_s16_t twov = vec_splat_s16(2);
159 + vec_u16_t onev = vec_splat_s16(1);
160 + vec_u16_t twov = vec_splat_s16(2);
162 dct[0][0] += 32; // rounding for the >>6 at the end
164 vec_u8_t perm_ldv = vec_lvsl(0, dst);
165 vec_u8_t perm_stv = vec_lvsr(8, dst);
166 - vec_s16_t sixv = vec_splat_s16(6);
167 + vec_u16_t sixv = vec_splat_s16(6);
168 const vec_u8_t sel = (vec_u8_t) CV(0,0,0,0,0,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1);
170 Index: /common/ppc/quant.h
171 ===================================================================
172 --- /common/ppc/quant.h (revision 601)
173 +++ /common/ppc/quant.h (revision 621)
175 *****************************************************************************/
178 +#include <altivec.h>
182 #define _PPC_QUANT_H 1
187 - vector signed int v;
190 + unsigned short s[8];
191 + vector unsigned short v;
194 void x264_quant_4x4_altivec( int16_t dct[4][4], int quant_mf[4][4], int const i_qbits, int const f );