PR inline-asm/84742
[official-gcc.git] / gcc / testsuite / gcc.c-torture / execute / 20060420-1.c
blob53393d17093d1586b4e5889aceb29d71c76f4774
1 extern void abort (void);
3 typedef float v4flt __attribute__ ((vector_size (16)));
5 void __attribute__ ((noinline)) foo (float *dst, float **src, int a, int n)
7 int i, j;
8 int z = sizeof (v4flt) / sizeof (float);
9 unsigned m = sizeof (v4flt) - 1;
11 for (j = 0; j < n && (((unsigned long) dst + j) & m); ++j)
13 float t = src[0][j];
14 for (i = 1; i < a; ++i)
15 t += src[i][j];
16 dst[j] = t;
19 for (; j < (n - (4 * z - 1)); j += 4 * z)
21 v4flt t0 = *(v4flt *) (src[0] + j + 0 * z);
22 v4flt t1 = *(v4flt *) (src[0] + j + 1 * z);
23 v4flt t2 = *(v4flt *) (src[0] + j + 2 * z);
24 v4flt t3 = *(v4flt *) (src[0] + j + 3 * z);
25 for (i = 1; i < a; ++i)
27 t0 += *(v4flt *) (src[i] + j + 0 * z);
28 t1 += *(v4flt *) (src[i] + j + 1 * z);
29 t2 += *(v4flt *) (src[i] + j + 2 * z);
30 t3 += *(v4flt *) (src[i] + j + 3 * z);
32 *(v4flt *) (dst + j + 0 * z) = t0;
33 *(v4flt *) (dst + j + 1 * z) = t1;
34 *(v4flt *) (dst + j + 2 * z) = t2;
35 *(v4flt *) (dst + j + 3 * z) = t3;
37 for (; j < n; ++j)
39 float t = src[0][j];
40 for (i = 1; i < a; ++i)
41 t += src[i][j];
42 dst[j] = t;
46 float buffer[64];
48 int
49 main (void)
51 int i;
52 float *dst, *src[2];
53 char *cptr;
55 cptr = (char *)buffer;
56 cptr += (-(long int) buffer & (16 * sizeof (float) - 1));
57 dst = (float *)cptr;
58 src[0] = dst + 16;
59 src[1] = dst + 32;
60 for (i = 0; i < 16; ++i)
62 src[0][i] = (float) i + 11 * (float) i;
63 src[1][i] = (float) i + 12 * (float) i;
65 foo (dst, src, 2, 16);
66 for (i = 0; i < 16; ++i)
68 float e = (float) i + 11 * (float) i + (float) i + 12 * (float) i;
69 if (dst[i] != e)
70 abort ();
72 return 0;