2014-04-15 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / testsuite / gcc.target / i386 / avx2-mpsadbw-2.c
blob18118e44202697d4b47621ea3ccfc7813069daf9
1 /* { dg-do run } */
2 /* { dg-require-effective-target avx2 } */
3 /* { dg-options "-O2 -mavx2" } */
5 #include <string.h>
6 #include "avx2-check.h"
8 #define msk0 0xC0
9 #define msk1 0x01
10 #define msk2 0xF2
11 #define msk3 0x03
12 #define msk4 0x84
13 #define msk5 0x05
14 #define msk6 0xE6
15 #define msk7 0x67
18 static void
19 compute_mpsadbw (int *i1, int *i2, int mask, int *r)
21 unsigned char s[4];
22 int i, j;
23 int offs1, offs2;
24 unsigned char *v1 = (char *) i1;
25 unsigned char *v2 = (char *) i2;
26 unsigned short *ret = (unsigned short *) r;
28 memset (ret, 0, 32);
30 /* Lower part */
31 offs2 = 4 * (mask & 3);
32 for (i = 0; i < 4; i++)
33 s[i] = v2[offs2 + i];
35 offs1 = 4 * ((mask & 4) >> 2);
36 for (j = 0; j < 8; j++)
37 for (i = 0; i < 4; i++)
38 ret[j] += abs (v1[offs1 + j + i] - s[i]);
40 /* Higher part */
41 offs2 = 4 * ((mask >> 3) & 3) + 16;
42 for (i = 0; i < 4; i++)
43 s[i] = v2[offs2 + i];
45 offs1 = 4 * ((mask & 0x20) >> 5) + 16;
46 for (j = 0; j < 8; j++)
47 for (i = 0; i < 4; i++)
48 ret[j + 8] += abs (v1[offs1 + j + i] - s[i]);
51 static void
52 avx2_test (void)
54 union256i_d val1, val2, val3[8], res[8];
55 int tmp[8];
56 unsigned char masks[8];
57 int i, j;
59 val1.a[0] = 0x35251505;
60 val1.a[1] = 0x75655545;
61 val1.a[2] = 0xB5A59585;
62 val1.a[3] = 0xF5E5D5C5;
64 val1.a[4] = 0x35251505;
65 val1.a[5] = 0x75655545;
66 val1.a[6] = 0xB5A59585;
67 val1.a[7] = 0xF5E5D5C5;
69 val2.a[0] = 0x31211101;
70 val2.a[1] = 0x71615141;
71 val2.a[2] = 0xB1A19181;
72 val2.a[3] = 0xF1E1D1C1;
74 val2.a[4] = 0x31211101;
75 val2.a[5] = 0x71615141;
76 val2.a[6] = 0xB1A19181;
77 val2.a[7] = 0xF1E1D1C1;
79 for (i = 0; i < 8; i++)
80 switch (i % 3)
82 case 1:
83 val3[i].a[0] = 0xF1E1D1C1;
84 val3[i].a[1] = 0xB1A19181;
85 val3[i].a[2] = 0x71615141;
86 val3[i].a[3] = 0x31211101;
87 break;
88 default:
89 val3[i].x = val2.x;
90 break;
93 /* Check mpsadbw imm8, ymm, ymm. */
94 res[0].x = _mm256_mpsadbw_epu8 (val1.x, val2.x, msk0);
95 res[1].x = _mm256_mpsadbw_epu8 (val1.x, val2.x, msk1);
96 res[2].x = _mm256_mpsadbw_epu8 (val1.x, val2.x, msk2);
97 res[3].x = _mm256_mpsadbw_epu8 (val1.x, val2.x, msk3);
98 res[4].x = _mm256_mpsadbw_epu8 (val1.x, val2.x, msk4);
99 res[5].x = _mm256_mpsadbw_epu8 (val1.x, val2.x, msk5);
100 res[6].x = _mm256_mpsadbw_epu8 (val1.x, val2.x, msk6);
101 res[7].x = _mm256_mpsadbw_epu8 (val1.x, val2.x, msk7);
103 masks[0] = msk0;
104 masks[1] = msk1;
105 masks[2] = msk2;
106 masks[3] = msk3;
107 masks[4] = msk4;
108 masks[5] = msk5;
109 masks[6] = msk6;
110 masks[7] = msk7;
112 for (i = 0; i < 8; i++)
114 compute_mpsadbw (val1.a, val2.a, masks[i], tmp);
115 if (check_union256i_d (res[i], tmp))
116 abort ();
119 /* Check mpsadbw imm8, m256, ymm. */
120 for (i = 0; i < 8; i++)
122 res[i].x = _mm256_mpsadbw_epu8 (val1.x, val3[i].x, msk4);
123 masks[i] = msk4;
126 for (i = 0; i < 8; i++)
128 compute_mpsadbw (val1.a, val3[i].a, masks[i], tmp);
129 if (check_union256i_d (res[i], tmp))
130 abort ();