PR middle-end/77357 - strlen of constant strings not folded
[official-gcc.git] / gcc / testsuite / gcc.dg / vmx / dct.c
blob00c4cd93c7b3f0655970e779efd2692074e16f1b
1 /* { dg-do compile } */
2 #include <altivec.h>
4 inline void
5 transpose_vmx (vector signed short *input, vector signed short *output)
7 vector signed short v0, v1, v2, v3, v4, v5, v6, v7;
8 vector signed short x0, x1, x2, x3, x4, x5, x6, x7;
10 /* Matrix transpose */
11 v0 = vec_mergeh (input[0], input[4]);
12 v1 = vec_mergel (input[0], input[4]);
13 v2 = vec_mergeh (input[1], input[5]);
14 v3 = vec_mergel (input[1], input[5]);
15 v4 = vec_mergeh (input[2], input[6]);
16 v5 = vec_mergel (input[2], input[6]);
17 v6 = vec_mergeh (input[3], input[7]);
18 v7 = vec_mergel (input[3], input[7]);
20 x0 = vec_mergeh (v0, v4);
21 x1 = vec_mergel (v0, v4);
22 x2 = vec_mergeh (v1, v5);
23 x3 = vec_mergel (v1, v5);
24 x4 = vec_mergeh (v2, v6);
25 x5 = vec_mergel (v2, v6);
26 x6 = vec_mergeh (v3, v7);
27 x7 = vec_mergel (v3, v7);
29 output[0] = vec_mergeh (x0, x4);
30 output[1] = vec_mergel (x0, x4);
31 output[2] = vec_mergeh (x1, x5);
32 output[3] = vec_mergel (x1, x5);
33 output[4] = vec_mergeh (x2, x6);
34 output[5] = vec_mergel (x2, x6);
35 output[6] = vec_mergeh (x3, x7);
36 output[7] = vec_mergel (x3, x7);
39 void
40 dct_vmx (vector signed short *input, vector signed short *output,
41 vector signed short *postscale)
43 vector signed short mul0, mul1, mul2, mul3, mul4, mul5, mul6, mul;
44 vector signed short v0, v1, v2, v3, v4, v5, v6, v7, v8, v9;
45 vector signed short v20, v21, v22, v23, v24, v25, v26, v27, v31;
46 int i;
47 vector signed short in[8], out[8];
49 /* Load first eight rows of input data */
51 /* Load multiplication constants */
53 /* Splat multiplication constants */
54 mul0 = vec_splat(input[8],0);
55 mul1 = vec_splat(input[8],1);
56 mul2 = vec_splat(input[8],2);
57 mul3 = vec_splat(input[8],3);
58 mul4 = vec_splat(input[8],4);
59 mul5 = vec_splat(input[8],5);
60 mul6 = vec_splat(input[8],6);
62 /* Perform DCT on the eight columns */
64 /*********** Stage 1 ***********/
66 v8 = vec_adds (input[0], input[7]);
67 v9 = vec_subs (input[0], input[7]);
68 v0 = vec_adds (input[1], input[6]);
69 v7 = vec_subs (input[1], input[6]);
70 v1 = vec_adds (input[2], input[5]);
71 v6 = vec_subs (input[2], input[5]);
72 v2 = vec_adds (input[3], input[4]);
73 v5 = vec_subs (input[3], input[4]);
75 /*********** Stage 2 ***********/
77 /* Top */
78 v3 = vec_adds (v8, v2); /* (V0+V7) + (V3+V4) */
79 v4 = vec_subs (v8, v2); /* (V0+V7) - (V3+V4) */
80 v2 = vec_adds (v0, v1); /* (V1+V6) + (V2+V5) */
81 v8 = vec_subs (v0, v1); /* (V1+V6) - (V2+V5) */
83 /* Bottom */
84 v0 = vec_subs (v7, v6); /* (V1-V6) - (V2-V5) */
85 v1 = vec_adds (v7, v6); /* (V1-V6) + (V2-V5) */
87 /*********** Stage 3 ***********/
89 /* Top */
90 in[0] = vec_adds (v3, v2); /* y0 = v3 + v2 */
91 in[4] = vec_subs (v3, v2); /* y4 = v3 - v2 */
92 in[2] = vec_mradds (v8, mul2, v4); /* y2 = v8 * a0 + v4 */
93 v6 = vec_mradds (v4, mul2, mul6);
94 in[6] = vec_subs (v6, v8); /* y6 = v4 * a0 - v8 */
96 /* Bottom */
97 v6 = vec_mradds (v0, mul0, v5); /* v6 = v0 * (c4) + v5 */
98 v7 = vec_mradds (v0, mul4, v5); /* v7 = v0 * (-c4) + v5 */
99 v2 = vec_mradds (v1, mul4, v9); /* v2 = v1 * (-c4) + v9 */
100 v3 = vec_mradds (v1, mul0, v9); /* v3 = v1 * (c4) + v9 */
102 /*********** Stage 4 ***********/
104 /* Bottom */
105 in[1] = vec_mradds (v6, mul3, v3); /* y1 = v6 * (a1) + v3 */
106 v23 = vec_mradds (v3, mul3, mul6);
107 in[7] = vec_subs (v23, v6); /* y7 = v3 * (a1) - v6 */
108 in[5] = vec_mradds (v2, mul1, v7); /* y5 = v2 * (a2) + v7 */
109 in[3] = vec_mradds (v7, mul5, v2); /* y3 = v7 * (-a2) + v2 */
111 transpose_vmx (in, out);
113 /* Perform DCT on the eight rows */
115 /*********** Stage 1 ***********/
117 v8 = vec_adds (out[0], out[7]);
118 v9 = vec_subs (out[0], out[7]);
119 v0 = vec_adds (out[1], out[6]);
120 v7 = vec_subs (out[1], out[6]);
121 v1 = vec_adds (out[2], out[5]);
122 v6 = vec_subs (out[2], out[5]);
123 v2 = vec_adds (out[3], out[4]);
124 v5 = vec_subs (out[3], out[4]);
126 /*********** Stage 2 ***********/
128 /* Top */
129 v3 = vec_adds (v8, v2); /* (V0+V7) + (V3+V4) */
130 v4 = vec_subs (v8, v2); /* (V0+V7) - (V3+V4) */
131 v2 = vec_adds (v0, v1); /* (V1+V6) + (V2+V5) */
132 v8 = vec_subs (v0, v1); /* (V1+V6) - (V2+V5) */
134 /* Bottom */
135 v0 = vec_subs (v7, v6); /* (V1-V6) - (V2-V5) */
136 v1 = vec_adds (v7, v6); /* (V1-V6) + (V2-V5) */
138 /*********** Stage 3 ***********/
140 /* Top */
141 v25 = vec_subs (v25, v25); /* reinit v25 = 0 */
143 v20 = vec_adds (v3, v2); /* y0 = v3 + v2 */
144 v24 = vec_subs (v3, v2); /* y4 = v3 - v2 */
145 v22 = vec_mradds (v8, mul2, v4); /* y2 = v8 * a0 + v4 */
146 v6 = vec_mradds (v4, mul2, v25);
147 v26 = vec_subs (v6, v8); /* y6 = v4 * a0 - v8 */
149 /* Bottom */
150 v6 = vec_mradds (v0, mul0, v5); /* v6 = v0 * (c4) + v5 */
151 v7 = vec_mradds (v0, mul4, v5); /* v7 = v0 * (-c4) + v5 */
152 v2 = vec_mradds (v1, mul4, v9); /* v2 = v1 * (-c4) + v9 */
153 v3 = vec_mradds (v1, mul0, v9); /* v3 = v1 * (c4) + v9 */
155 /*********** Stage 4 ***********/
157 /* Bottom */
158 v21 = vec_mradds (v6, mul3, v3); /* y1 = v6 * (a1) + v3 */
159 v23 = vec_mradds (v3, mul3, v25);
160 v27 = vec_subs (v23, v6); /* y7 = v3 * (a1) - v6 */
161 v25 = vec_mradds (v2, mul1, v7); /* y5 = v2 * (a2) + v7 */
162 v23 = vec_mradds (v7, mul5, v2); /* y3 = v7 * (-a2) + v2 */
164 /* Post-scale and store reults */
166 v31 = vec_subs (v31, v31); /* reinit v25 = 0 */
168 output[0] = vec_mradds (postscale[0], v20, v31);
169 output[2] = vec_mradds (postscale[2], v22, v31);
170 output[4] = vec_mradds (postscale[4], v24, v31);
171 output[6] = vec_mradds (postscale[6], v26, v31);
172 output[1] = vec_mradds (postscale[1], v21, v31);
173 output[3] = vec_mradds (postscale[3], v23, v31);
174 output[5] = vec_mradds (postscale[5], v25, v31);
175 output[7] = vec_mradds (postscale[7], v27, v31);