Use gather loads for strided accesses
[official-gcc.git] / gcc / testsuite / gcc.target / i386 / sse4_1-mpsadbw.c
blob0fc24e86111685d6e35fb1f939d6dc6b4a540ba7
1 /* { dg-do run } */
2 /* { dg-require-effective-target sse4 } */
3 /* { dg-options "-O2 -msse4.1" } */
5 #ifndef CHECK_H
6 #define CHECK_H "sse4_1-check.h"
7 #endif
9 #ifndef TEST
10 #define TEST sse4_1_test
11 #endif
13 #include CHECK_H
15 #include <smmintrin.h>
16 #include <string.h>
18 #define msk0 0xC0
19 #define msk1 0x01
20 #define msk2 0xF2
21 #define msk3 0x03
22 #define msk4 0x84
23 #define msk5 0x05
24 #define msk6 0xE6
25 #define msk7 0x67
27 static __m128i
28 compute_mpsadbw (unsigned char *v1, unsigned char *v2, int mask)
30 union
32 __m128i x;
33 unsigned short s[8];
34 } ret;
35 unsigned char s[4];
36 int i, j;
37 int offs1, offs2;
39 offs2 = 4 * (mask & 3);
40 for (i = 0; i < 4; i++)
41 s[i] = v2[offs2 + i];
43 offs1 = 4 * ((mask & 4) >> 2);
44 for (j = 0; j < 8; j++)
46 ret.s[j] = 0;
47 for (i = 0; i < 4; i++)
48 ret.s[j] += abs (v1[offs1 + j + i] - s[i]);
51 return ret.x;
54 static void
55 TEST (void)
57 union
59 __m128i x;
60 unsigned int i[4];
61 unsigned char c[16];
62 } val1, val2, val3 [8];
63 __m128i res[8], tmp;
64 unsigned char masks[8];
65 int i;
67 val1.i[0] = 0x35251505;
68 val1.i[1] = 0x75655545;
69 val1.i[2] = 0xB5A59585;
70 val1.i[3] = 0xF5E5D5C5;
72 val2.i[0] = 0x31211101;
73 val2.i[1] = 0x71615141;
74 val2.i[2] = 0xB1A19181;
75 val2.i[3] = 0xF1E1D1C1;
77 for (i=0; i < 8; i++)
78 switch (i % 3)
80 case 1:
81 val3[i].i[0] = 0xF1E1D1C1;
82 val3[i].i[1] = 0xB1A19181;
83 val3[i].i[2] = 0x71615141;
84 val3[i].i[3] = 0x31211101;
85 break;
86 default:
87 val3[i].x = val2.x;
88 break;
91 /* Check mpsadbw imm8, xmm, xmm. */
92 res[0] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk0);
93 res[1] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk1);
94 res[2] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk2);
95 res[3] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk3);
96 res[4] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk4);
97 res[5] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk5);
98 res[6] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk6);
99 res[7] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk7);
101 masks[0] = msk0;
102 masks[1] = msk1;
103 masks[2] = msk2;
104 masks[3] = msk3;
105 masks[4] = msk4;
106 masks[5] = msk5;
107 masks[6] = msk6;
108 masks[7] = msk7;
110 for (i=0; i < 8; i++)
112 tmp = compute_mpsadbw (val1.c, val2.c, masks[i]);
113 if (memcmp (&tmp, &res[i], sizeof (tmp)))
114 abort ();
117 /* Check mpsadbw imm8, m128, xmm. */
118 for (i=0; i < 8; i++)
120 res[i] = _mm_mpsadbw_epu8 (val1.x, val3[i].x, msk4);
121 masks[i] = msk4;
124 for (i=0; i < 8; i++)
126 tmp = compute_mpsadbw (val1.c, val3[i].c, masks[i]);
127 if (memcmp (&tmp, &res[i], sizeof (tmp)))
128 abort ();