1 /* { dg-require-effective-target vect_int } */
17 #define N (VECTOR_BITS * 3 / 32 + 4)
22 void foo (unsigned int *__restrict__ pInput
, unsigned int *__restrict__ pOutput
)
24 unsigned int i
, a
, b
, c
;
26 for (i
= 0; i
< N
/ 3; i
++)
32 *pOutput
++ = M00
* a
+ M01
* b
+ M02
* c
;
33 *pOutput
++ = M10
* a
+ M11
* b
+ M12
* c
;
34 *pOutput
++ = M20
* a
+ M21
* b
+ M22
* c
;
38 int main (int argc
, const char* argv
[])
40 unsigned int input
[N
], output
[N
], i
;
44 for (i
= 0; i
< N
; i
++)
48 __asm__
volatile ("");
52 unsigned int check_results
[N
] = {1470, 395, 28271, 5958, 1655, 111653, 10446, 2915, 195035, 14934, 4175, 278417, 19422, 5435, 361799, 0};
54 volatile unsigned int check_results
[N
] = {};
56 for (unsigned int i
= 0; i
< N
/ 3; i
++)
58 unsigned int a
= input
[i
* 3];
59 unsigned int b
= input
[i
* 3 + 1];
60 unsigned int c
= input
[i
* 3 + 2];
62 check_results
[i
* 3] = M00
* a
+ M01
* b
+ M02
* c
;
63 check_results
[i
* 3 + 1] = M10
* a
+ M11
* b
+ M12
* c
;
64 check_results
[i
* 3 + 2] = M20
* a
+ M21
* b
+ M22
* c
;
66 asm volatile ("" ::: "memory");
72 for (i
= 0; i
< N
; i
++)
74 if (output
[i
] != check_results
[i
])
76 __asm__
volatile ("");
82 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */
83 /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm3_int && {! vect_load_lanes } } } } } */
84 /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
85 /* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_int && vect_load_lanes } } } } */
86 /* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
87 /* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */