This commit was manufactured by cvs2svn to create branch
[official-gcc.git] / gcc / testsuite / gcc.dg / i386-sse-6.c
blob6642891485bf420375409e32ad810700c11b32be
1 /* { dg-do run { target i?86-*-* x86_64-*-* } } */
2 /* { dg-options "-O2 -msse2" } */
3 #include <xmmintrin.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include "i386-cpuid.h"
9 #ifndef NOINLINE
10 #define NOINLINE __attribute__ ((noinline))
11 #endif
13 #define SHIFT (4)
15 typedef union {
16 __m128i v;
17 unsigned int s[4];
18 unsigned short int t[8];
19 unsigned long long u[2];
20 unsigned char c[16];
21 }vecInLong;
23 void sse2_tests (void) NOINLINE;
24 void dump128_16 (char *, char *, vecInLong);
25 void dump128_32 (char *, char *, vecInLong);
26 void dump128_64 (char *, char *, vecInLong);
27 void dump128_128 (char *, char *, vecInLong);
28 int check (const char *, const char *[]);
30 char buf[8000];
31 char comparison[8000];
32 static int errors = 0;
34 vecInLong a128, b128, c128, d128, e128, f128;
35 __m128i m128_16, m128_32, s128, m128_64, m128_128;
36 __m64 m64_16, s64, m64_32, m64_64;
38 const char *reference_sse2[] = {
39 "_mm_srai_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",
40 "_mm_sra_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",
41 "_mm_srai_epi32 00123456 00123456 00123456 00123456 \n",
42 "_mm_sra_epi32 00123456 00123456 00123456 00123456 \n",
43 "_mm_srli_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",
44 "_mm_srl_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",
45 "_mm_srli_epi32 00123456 00123456 00123456 00123456 \n",
46 "_mm_srl_epi32 00123456 00123456 00123456 00123456 \n",
47 "_mm_srli_epi64 00123456789abcde 00123456789abcde \n",
48 "_mm_srl_epi64 00123456789abcde 00123456789abcde \n",
49 "_mm_srli_si128 (byte shift) 00000000ffeeddccbbaa998877665544\n",
50 "_mm_slli_epi16 1230 1230 1230 1230 1230 1230 1230 1230 \n",
51 "_mm_sll_epi16 1230 1230 1230 1230 1230 1230 1230 1230 \n",
52 "_mm_slli_epi32 12345670 12345670 12345670 12345670 \n",
53 "_mm_sll_epi32 12345670 12345670 12345670 12345670 \n",
54 "_mm_slli_epi64 123456789abcdef0 123456789abcdef0 \n",
55 "_mm_sll_epi64 123456789abcdef0 123456789abcdef0 \n",
56 "_mm_sll_si128 (byte shift) bbaa9988776655443322110000000000\n",
57 "_mm_shuffle_epi32 ffeeddcc bbaa9988 77665544 33221100 \n",
58 "_mm_shuffelo_epi16 7766 5544 3322 1100 9988 bbaa ddcc ffee \n",
59 "_mm_shuffehi_epi16 1100 3322 5544 7766 ffee ddcc bbaa 9988 \n",
63 int main()
65 unsigned long cpu_facilities;
67 cpu_facilities = i386_cpuid ();
69 if ((cpu_facilities & (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))
70 != (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))
71 /* If host has no vector support, pass. */
72 exit (0);
74 a128.s[0] = 0x01234567;
75 a128.s[1] = 0x01234567;
76 a128.s[2] = 0x01234567;
77 a128.s[3] = 0x01234567;
79 m128_32 = a128.v;
81 d128.u[0] = 0x0123456789abcdefULL;
82 d128.u[1] = 0x0123456789abcdefULL;
84 m128_64 = d128.v;
86 /* This is the 128-bit constant 0x00112233445566778899aabbccddeeff,
87 expressed as two little-endian 64-bit words. */
88 e128.u[0] = 0x7766554433221100ULL;
89 e128.u[1] = 0xffeeddccbbaa9988ULL;
91 f128.t[0] = 0x0123;
92 f128.t[1] = 0x0123;
93 f128.t[2] = 0x0123;
94 f128.t[3] = 0x0123;
95 f128.t[4] = 0x0123;
96 f128.t[5] = 0x0123;
97 f128.t[6] = 0x0123;
98 f128.t[7] = 0x0123;
100 m128_16 = f128.v;
102 m128_128 = e128.v;
104 b128.s[0] = SHIFT;
105 b128.s[1] = 0;
106 b128.s[2] = 0;
107 b128.s[3] = 0;
109 s128 = b128.v;
111 if (cpu_facilities & bit_SSE2)
113 sse2_tests();
114 check (buf, reference_sse2);
115 #ifdef DEBUG
116 printf ("sse2 testing:\n");
117 printf (buf);
118 printf ("\ncomparison:\n");
119 printf (comparison);
120 #endif
121 buf[0] = '\0';
124 if (errors != 0)
125 abort ();
126 exit (0);
129 void NOINLINE
130 sse2_tests (void)
132 /* psraw */
133 c128.v = _mm_srai_epi16 (m128_16, SHIFT);
134 dump128_16 (buf, "_mm_srai_epi16", c128);
135 c128.v = _mm_sra_epi16 (m128_16, s128);
136 dump128_16 (buf, "_mm_sra_epi16", c128);
138 /* psrad */
139 c128.v = _mm_srai_epi32 (m128_32, SHIFT);
140 dump128_32 (buf, "_mm_srai_epi32", c128);
141 c128.v = _mm_sra_epi32 (m128_32, s128);
142 dump128_32 (buf, "_mm_sra_epi32", c128);
144 /* psrlw */
145 c128.v = _mm_srli_epi16 (m128_16, SHIFT);
146 dump128_16 (buf, "_mm_srli_epi16", c128);
147 c128.v = _mm_srl_epi16 (m128_16, s128);
148 dump128_16 (buf, "_mm_srl_epi16", c128);
150 /* psrld */
151 c128.v = _mm_srli_epi32 (m128_32, SHIFT);
152 dump128_32 (buf, "_mm_srli_epi32", c128);
153 c128.v = _mm_srl_epi32 (m128_32, s128);
154 dump128_32 (buf, "_mm_srl_epi32", c128);
156 /* psrlq */
157 c128.v = _mm_srli_epi64 (m128_64, SHIFT);
158 dump128_64 (buf, "_mm_srli_epi64", c128);
159 c128.v = _mm_srl_epi64 (m128_64, s128);
160 dump128_64 (buf, "_mm_srl_epi64", c128);
162 /* psrldq */
163 c128.v = _mm_srli_si128 (m128_128, SHIFT);
164 dump128_128 (buf, "_mm_srli_si128 (byte shift) ", c128);
166 /* psllw */
167 c128.v = _mm_slli_epi16 (m128_16, SHIFT);
168 dump128_16 (buf, "_mm_slli_epi16", c128);
169 c128.v = _mm_sll_epi16 (m128_16, s128);
170 dump128_16 (buf, "_mm_sll_epi16", c128);
172 /* pslld */
173 c128.v = _mm_slli_epi32 (m128_32, SHIFT);
174 dump128_32 (buf, "_mm_slli_epi32", c128);
175 c128.v = _mm_sll_epi32 (m128_32, s128);
176 dump128_32 (buf, "_mm_sll_epi32", c128);
178 /* psllq */
179 c128.v = _mm_slli_epi64 (m128_64, SHIFT);
180 dump128_64 (buf, "_mm_slli_epi64", c128);
181 c128.v = _mm_sll_epi64 (m128_64, s128);
182 dump128_64 (buf, "_mm_sll_epi64", c128);
184 /* pslldq */
185 c128.v = _mm_slli_si128 (m128_128, SHIFT);
186 dump128_128 (buf, "_mm_sll_si128 (byte shift)", c128);
188 /* Shuffle constant 0x1b == 0b_00_01_10_11, e.g. swap words: ABCD => DCBA. */
190 /* pshufd */
191 c128.v = _mm_shuffle_epi32 (m128_128, 0x1b);
192 dump128_32 (buf, "_mm_shuffle_epi32", c128);
194 /* pshuflw */
195 c128.v = _mm_shufflelo_epi16 (m128_128, 0x1b);
196 dump128_16 (buf, "_mm_shuffelo_epi16", c128);
198 /* pshufhw */
199 c128.v = _mm_shufflehi_epi16 (m128_128, 0x1b);
200 dump128_16 (buf, "_mm_shuffehi_epi16", c128);
203 void
204 dump128_16 (char *buf, char *name, vecInLong x)
206 int i;
207 char *p = buf + strlen (buf);
209 sprintf (p, "%s ", name);
210 p += strlen (p);
212 for (i=0; i<8; i++)
214 sprintf (p, "%4.4x ", x.t[i]);
215 p += strlen (p);
217 strcat (p, "\n");
220 void
221 dump128_32 (char *buf, char *name, vecInLong x)
223 int i;
224 char *p = buf + strlen (buf);
226 sprintf (p, "%s ", name);
227 p += strlen (p);
229 for (i=0; i<4; i++)
231 sprintf (p, "%8.8x ", x.s[i]);
232 p += strlen (p);
234 strcat (p, "\n");
237 void
238 dump128_64 (char *buf, char *name, vecInLong x)
240 int i;
241 char *p = buf + strlen (buf);
243 sprintf (p, "%s ", name);
244 p += strlen (p);
246 for (i=0; i<2; i++)
248 sprintf (p, "%16.16llx ", x.u[i]);
249 p += strlen (p);
251 strcat (p, "\n");
254 void
255 dump128_128 (char *buf, char *name, vecInLong x)
257 int i;
258 char *p = buf + strlen (buf);
260 sprintf (p, "%s ", name);
261 p += strlen (p);
263 for (i=15; i>=0; i--)
265 /* This is cheating; we don't have a 128-bit int format code.
266 Running the loop backwards to compensate for the
267 little-endian layout. */
268 sprintf (p, "%2.2x", x.c[i]);
269 p += strlen (p);
271 strcat (p, "\n");
275 check (const char *input, const char *reference[])
277 int broken, i, j, len;
278 const char *p_input;
279 char *p_comparison;
280 int new_errors = 0;
282 p_comparison = &comparison[0];
283 p_input = input;
285 for (i = 0; *reference[i] != '\0'; i++)
287 broken = 0;
288 len = strlen (reference[i]);
289 for (j = 0; j < len; j++)
291 /* Ignore the terminating NUL characters at the end of every string in 'reference[]'. */
292 if (!broken && *p_input != reference[i][j])
294 *p_comparison = '\0';
295 strcat (p_comparison, " >>> ");
296 p_comparison += strlen (p_comparison);
297 new_errors++;
298 broken = 1;
300 *p_comparison = *p_input;
301 p_comparison++;
302 p_input++;
304 if (broken)
306 *p_comparison = '\0';
307 strcat (p_comparison, "expected:\n");
308 strcat (p_comparison, reference[i]);
309 p_comparison += strlen (p_comparison);
312 *p_comparison = '\0';
313 strcat (p_comparison, new_errors ? "failure\n\n" : "O.K.\n\n") ;
314 errors += new_errors;
315 return 0;