1 /* { dg-require-effective-target vect_int } */
22 #define N (VECTOR_BITS * 3 / 32 + 4)
27 void foo (int *__restrict__ pInput
, int *__restrict__ pOutput
,
28 int *__restrict__ pInput2
, int *__restrict__ pOutput2
)
32 for (i
= 0; i
< N
/ 3; i
++)
41 *pOutput
++ = M00
* a
+ M01
* b
+ M02
* c
;
42 *pOutput
++ = M10
* a
+ M11
* b
+ M12
* c
;
43 *pOutput
++ = M20
* a
+ M21
* b
+ M22
* c
;
45 /* Regular SLP - no permutation required. */
46 *pOutput2
++ = K00
* d
;
47 *pOutput2
++ = K10
* e
;
51 int main (int argc
, const char* argv
[])
53 int input
[N
], output
[N
], i
;
54 int input2
[N
], output2
[N
];
58 for (i
= 0; i
< N
; i
++)
64 __asm__
volatile ("");
68 int check_results
[N
] = { 1470, 395, 28271, 5958, 1655, 111653, 10446, 2915,
69 195035, 14934, 4175, 278417, 19422, 5435, 361799,
71 int check_results2
[N
] = { 0, 112, 810, 336, 1620, 560, 2430, 784, 3240, 1008,
74 volatile int check_results
[N
] = {};
75 volatile int check_results2
[N
] = {};
77 for (int i
= 0; i
< N
/ 3; i
++)
80 int b
= input
[i
* 3 + 1];
81 int c
= input
[i
* 3 + 2];
82 int d
= input2
[i
* 2];
83 int e
= input2
[i
* 2 + 1];
85 check_results
[i
* 3] = M00
* a
+ M01
* b
+ M02
* c
;
86 check_results
[i
* 3 + 1] = M10
* a
+ M11
* b
+ M12
* c
;
87 check_results
[i
* 3 + 2] = M20
* a
+ M21
* b
+ M22
* c
;
89 check_results2
[i
* 2] = K00
* d
;
90 check_results2
[i
* 2 + 1] = K10
* e
;
92 asm volatile ("" ::: "memory");
96 foo (input
, output
, input2
, output2
);
98 for (i
= 0; i
< N
; i
++)
99 if (output
[i
] != check_results
[i
] || output2
[i
] != check_results2
[i
])
105 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */
106 /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int && { ! vect_load_lanes } } } } } */
107 /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes } } } */
108 /* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_int && vect_load_lanes } } } } */
109 /* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
110 /* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */