1 // { dg-do compile { target { i?86-*-* x86_64-*-* } && { ! *-*-solaris* } } }
2 // { dg-options "-Ofast -funroll-loops -fopenmp -march=knl" }
3 // { dg-final { scan-assembler-not "vmovaps" } }
9 #define max(a, b) ( (a) > (b) ? (a) : (b) )
18 extern int N1, N2, N3;
20 #define func(p, up, down) ((p)*(up) + (1.0f-(p)) * (down))
22 void foo (Sdata *in, int idx, float *out)
24 float* y1 = (float*)_mm_malloc(sizeof(float) * N1,16);
25 float* y2 = (float*)_mm_malloc(sizeof(float) * N1,16);
26 float* y3 = (float*)_mm_malloc(sizeof(float) * N1,16);
27 float* y4 = (float*)_mm_malloc(sizeof(float) * N1,16);
29 for (int k = idx; k < idx + N3; k++) {
36 float u = exp(x4 * sqrt(x5));
37 float d = exp(-x4 * sqrt(x5));
38 float a = exp(x1 * x5);
39 float m = exp(-x1 * x5);
40 float p = (a - d) / (u - d);
43 for (int i = 1; i <= N2; i++) {
44 y2[i] = u * y2[i - 1];
45 y3[i] = d * y3[i - 1];
48 for (int i = 0; i <= N2; i++) {
50 max((x3 - y2[N2 - i] * y3[i]), float(0.f));
52 for (int i = N2 - 1; i >= 0; i--) {
54 for (int j = 0; j <= i; j++) {
55 y4[j] = func(p,y1[j],y1[j+1]) * m;
58 for (int j = 0; j <= i; j++) {
59 float t1 = y2[i - j] * y3[j];
60 float t2 = max(x3 - t1, float(0.f));
61 y1[j] = max(t2, y4[j]);