1 // Test sequences that can use RISBG with a zeroed first operand.
2 // The tests here assume that RISBLG isn't available.
4 /* Tests ported from the Llvm testsuite. */
6 /* { dg-do compile { target s390x-*-* } } */
7 /* { dg-options "-O3 -march=z10 -mzarch -fno-asynchronous-unwind-tables" } */
9 #define i64 signed long long
10 #define ui64 unsigned long long
11 #define i32 signed int
12 #define ui32 unsigned int
13 #define i8 signed char
14 #define ui8 unsigned char
16 // Test an extraction of bit 0 from a right-shifted value.
19 /* { dg-final { scan-assembler "f1:\n\trisbg\t%r2,%r2,64-1,128\\\+63,53\\\+1" } } */
20 i32 v_shr
= ((ui32
)v_foo
) >> 10;
21 i32 v_and
= v_shr
& 1;
25 // ...and again with i64.
28 /* { dg-final { scan-assembler "f2:\n\trisbg\t%r2,%r2,64-1,128\\\+63,53\\\+1" { target { lp64 } } } } */
29 /* { dg-final { scan-assembler "f2:\n\trisbg\t%r3,%r3,64-1,128\\\+63,53\\\+1\n\tlhi\t%r2,0" { target { ! lp64 } } } } */
30 i64 v_shr
= ((ui64
)v_foo
) >> 10;
31 i64 v_and
= v_shr
& 1;
35 // Test an extraction of other bits from a right-shifted value.
38 /* { dg-final { scan-assembler "f3:\n\trisbg\t%r2,%r2,60,128\\\+61,64-22" } } */
39 i32 v_shr
= ((ui32
)v_foo
) >> 22;
40 i32 v_and
= v_shr
& 12;
44 // ...and again with i64.
47 /* { dg-final { scan-assembler "f4:\n\trisbg\t%r2,%r2,60,128\\\+61,64-22" { target { lp64 } } } } */
48 /* { dg-final { scan-assembler "f4:\n\trisbg\t%r3,%r3,60,128\\\+61,64-22\n\tlhi\t%r2,0" { target { ! lp64 } } } } */
49 i64 v_shr
= ((ui64
)v_foo
) >> 22;
50 i64 v_and
= v_shr
& 12;
54 // Test an extraction of most bits from a right-shifted value.
55 // The range should be reduced to exclude the zeroed high bits.
58 /* { dg-final { scan-assembler "f5:\n\trisbg\t%r2,%r2,34,128\\\+60,64-2" } } */
59 i32 v_shr
= ((ui32
)v_foo
) >> 2;
60 i32 v_and
= v_shr
& -8;
64 // ...and again with i64.
67 /* { dg-final { scan-assembler "f6:\n\trisbg\t%r2,%r2,2,128\\\+60,64-2" { target { lp64 } } } } */
68 /* { dg-final { scan-assembler "f6:\n\trisbg\t%r3,%r2,0,0\\\+32-1,64-0-32\n\trisbg\t%r2,%r3,2,128\\\+60,64-2" { target { ! lp64 } } } } */
69 i64 v_shr
= ((ui64
)v_foo
) >> 2;
70 i64 v_and
= v_shr
& -8;
74 // Try the next value up (mask ....1111001). This needs a separate shift
79 { dg-final { scan-assembler "f7:\n\tsrl\t%r2,2\n\tnill\t%r2,65529" { xfail { lp64 } } } }
80 but because a zeroextend is merged into the pattern it is actually
81 { dg-final { scan-assembler "f7:\n\tsrl\t%r2,2\n\tlgfi\t%r1,1073741817\n\tngr\t%r2,%r1" { target { lp64 } } } }
82 { dg-final { scan-assembler "f7:\n\tsrl\t%r2,2\n\tnill\t%r2,65529" { target { ! lp64 } } } } */
83 i32 v_shr
= ((ui32
)v_foo
) >> 2;
84 i32 v_and
= v_shr
& -7;
88 // ...and again with i64.
91 /* { dg-final { scan-assembler "f8:\n\tsrlg\t%r2,%r2,2\n\tnill\t%r2,65529" { target { lp64 } } } } */
92 /* { dg-final { scan-assembler "f8:\n\trisbg\t%r3,%r2,0,0\\\+32-1,64-0-32\n\tsrlg\t%r2,%r3,2\n\tnill\t%r2,65529" { target { ! lp64 } } } } */
93 i64 v_shr
= ((ui64
)v_foo
) >> 2;
94 i64 v_and
= v_shr
& -7;
98 // Test an extraction of bits from a left-shifted value. The range should
99 // be reduced to exclude the zeroed low bits.
102 /* { dg-final { scan-assembler "f9:\n\trisbg\t%r2,%r2,56,128\\\+61,2" } } */
103 i32 v_shr
= v_foo
<< 2;
104 i32 v_and
= v_shr
& 255;
108 // ...and again with i64.
111 /* { dg-final { scan-assembler "f10:\n\trisbg\t%r2,%r2,56,128\\\+61,2" { target { lp64 } } } } */
112 /* { dg-final { scan-assembler "f10:\n\trisbg\t%r3,%r3,56,128\\\+61,2\n\tlhi\t%r2,0" { target { ! lp64 } } } } */
113 i64 v_shr
= v_foo
<< 2;
114 i64 v_and
= v_shr
& 255;
118 // Try a wrap-around mask (mask ....111100001111). This needs a separate shift
122 /* { dg-final { scan-assembler "f11:\n\tsll\t%r2,2\n\tnill\t%r2,65295" } } */
123 i32 v_shr
= v_foo
<< 2;
124 i32 v_and
= v_shr
& -241;
128 // ...and again with i64.
131 /* { dg-final { scan-assembler "f12:\n\tsllg\t%r2,%r2,2\n\tnill\t%r2,65295" { target { lp64 } } } } */
132 /* { dg-final { scan-assembler "f12:\n\trisbg\t%r3,%r2,0,0\\\+32-1,64-0-32\n\tsllg\t%r2,%r3,2\n\tnill\t%r2,65295" { target { ! lp64 } } } } */
133 i64 v_shr
= v_foo
<< 2;
134 i64 v_and
= v_shr
& -241;
138 // Test an extraction from a rotated value, no mask wraparound.
139 // This is equivalent to the lshr case, because the bits from the
143 /* { dg-final { scan-assembler "f13:\n\trisbg\t%r2,%r2,56,128\\\+60,32\\\+14" { target { lp64 } } } } */
144 /* { dg-final { scan-assembler "f13:\n\trll\t%r2,%r2,14\n\tnilf\t%r2,248" { target { ! lp64 } } } } */
145 i32 v_parta
= v_foo
<< 14;
146 i32 v_partb
= ((ui32
)v_foo
) >> 18;
147 i32 v_rotl
= v_parta
| v_partb
;
148 i32 v_and
= v_rotl
& 248;
152 // ...and again with i64.
155 /* { dg-final { scan-assembler "f14:\n\trisbg\t%r2,%r2,56,128\\\+60,14" { target { lp64 } } } } */
156 /* { dg-final { scan-assembler "f14:\n\trisbg\t%r3,%r2,56,128\\\+60,46\n\tlhi\t%r2,0" { target { ! lp64 } } } } */
157 i64 v_parta
= v_foo
<< 14;
158 i64 v_partb
= ((ui64
)v_foo
) >> 50;
159 i64 v_rotl
= v_parta
| v_partb
;
160 i64 v_and
= v_rotl
& 248;
164 // Try a case in which only the bits from the shl are used.
167 /* { dg-final { scan-assembler "f15:\n\trisbg\t%r2,%r2,47,128\\\+49,14" { target { lp64 } } } } */
168 /* { dg-final { scan-assembler "f15:\n\trll\t%r2,%r2,14\n\tnilf\t%r2,114688" { target { ! lp64 } } } } */
169 i32 v_parta
= v_foo
<< 14;
170 i32 v_partb
= ((ui32
)v_foo
) >> 18;
171 i32 v_rotl
= v_parta
| v_partb
;
172 i32 v_and
= v_rotl
& 114688;
176 // ...and again with i64.
179 /* { dg-final { scan-assembler "f16:\n\trisbg\t%r2,%r2,47,128\\\+49,14" { target { lp64 } } } } */
180 /* { dg-final { scan-assembler "f16:\n\trisbg\t%r3,%r3,47,128\\\+49,14\n\tlhi\t%r2,0" { target { ! lp64 } } } } */
181 i64 v_parta
= v_foo
<< 14;
182 i64 v_partb
= ((ui64
)v_foo
) >> 50;
183 i64 v_rotl
= v_parta
| v_partb
;
184 i64 v_and
= v_rotl
& 114688;
188 // Test a 32-bit rotate in which both parts of the OR are needed.
189 // This needs a separate shift and mask.
193 { dg-final { scan-assembler "f17:\n\trll\t%r2,%r2,4\n\tnilf\t%r2,126" { xfail { lp64 } } } }
194 but because a zeroextend is merged into the pattern it is actually
195 { dg-final { scan-assembler "f17:\n\trll\t%r2,%r2,4\n\trisbg\t%r2,%r2,57,128\\\+62,0" { target { lp64 } } } }
196 { dg-final { scan-assembler "f17:\n\trll\t%r2,%r2,4\n\tnilf\t%r2,126" { target { ! lp64 } } } } */
197 i32 v_parta
= v_foo
<< 4;
198 i32 v_partb
= ((ui32
)v_foo
) >> 28;
199 i32 v_rotl
= v_parta
| v_partb
;
200 i32 v_and
= v_rotl
& 126;
204 // ...and for i64, where RISBG should do the rotate too.
207 /* { dg-final { scan-assembler "f18:\n\trisbg\t%r2,%r2,57,128\\\+62,4" { target { lp64 } } } } */
208 /* { dg-final { scan-assembler "f18:\n\trisbg\t%r3,%r2,0,0\\\+32-1,64-0-32\n\tlhi\t%r2,0\n\trisbg\t%r3,%r3,57,128\\\+62,4" { target { ! lp64 } } } } */
209 i64 v_parta
= v_foo
<< 4;
210 i64 v_partb
= ((ui64
)v_foo
) >> 60;
211 i64 v_rotl
= v_parta
| v_partb
;
212 i64 v_and
= v_rotl
& 126;
216 // Test an arithmetic shift right in which some of the sign bits are kept.
217 // This needs a separate shift and mask.
221 { dg-final { scan-assembler "f19:\n\tsra\t%r2,28\n\tnilf\t%r2,30" { xfail { lp64 } } } }
222 but because a zeroextend is merged into the pattern it is actually
223 { dg-final { scan-assembler "f19:\n\tsra\t%r2,28\n\trisbg\t%r2,%r2,59,128\\\+62,0" { target { lp64 } } } }
224 { dg-final { scan-assembler "f19:\n\tsra\t%r2,28\n\tnilf\t%r2,30" { target { ! lp64 } } } } */
225 i32 v_shr
= v_foo
>> 28;
226 i32 v_and
= v_shr
& 30;
230 // ...and again with i64. In this case RISBG is the best way of doing the AND.
233 /* { dg-final { scan-assembler "f20:\n\tsrag\t%r2,%r2,60\n\trisbg\t%r2,%r2,59,128\\\+62,0" { target { lp64 } } } } */
234 /* { dg-final { scan-assembler "f20:\n\trisbg\t%r3,%r2,0,0\\\+32-1,64-0-32\n\tlhi\t%r2,0\n\tsrag\t%r3,%r3,60\n\tnilf\t%r3,30" { target { ! lp64 } } } } */
235 i64 v_shr
= v_foo
>> 60;
236 i64 v_and
= v_shr
& 30;
240 // Now try an arithmetic right shift in which the sign bits aren't needed.
241 // Note: Unlike Llvm, Gcc replaces the ashrt with a lshrt in any case, using
242 // a risbg pattern without ashrt.
245 /* { dg-final { scan-assembler "f21:\n\trisbg\t%r2,%r2,60,128\\\+62,64-28" } } */
246 i32 v_shr
= v_foo
>> 28;
247 i32 v_and
= v_shr
& 14;
251 // ...and again with i64.
254 /* { dg-final { scan-assembler "f22:\n\trisbg\t%r2,%r2,60,128\\\+62,64-60" { target { lp64 } } } } */
255 /* { dg-final { scan-assembler "f22:\n\trisbg\t%r3,%r2,60,128\\\+62,64-28\n\tlhi\t%r2,0" { target { ! lp64 } } } } */
256 i64 v_shr
= v_foo
>> 60;
257 i64 v_and
= v_shr
& 14;
261 // Check that we use RISBG for shifted values even if the AND is a
262 // natural zero extension.
265 /* { dg-final { scan-assembler "f23:\n\trisbg\t%r2,%r2,64-8,128\\\+63,54\\\+8" { target { lp64 } } } } */
266 /* { dg-final { scan-assembler "f23:\n\trisbg\t%r3,%r3,64-8,128\\\+63,54\\\+8\n\tlhi\t%r2,0" { target { ! lp64 } } } } */
267 i64 v_shr
= ((ui64
)v_foo
) >> 2;
268 i64 v_and
= v_shr
& 255;
272 // Test a case where the AND comes before a rotate. This needs a separate
276 /* { dg-final { scan-assembler "f24:\n\tnilf\t%r2,254\n\trll\t%r2,%r2,29" } } */
277 i32 v_and
= v_foo
& 254;
278 i32 v_parta
= ((ui32
)v_and
) >> 3;
279 i32 v_partb
= v_and
<< 29;
280 i32 v_rotl
= v_parta
| v_partb
;
284 // ...and again with i64, where a single RISBG is enough.
287 /* { dg-final { scan-assembler "f25:\n\trisbg\t%r2,%r2,57,128\\\+59,3" { target { lp64 } } } } */
288 /* { dg-final { scan-assembler "f25:\n\trisbg\t%r3,%r3,57,128\\\+59,3\n\tlhi\t%r2,0" { target { ! lp64 } } } } */
289 i64 v_and
= v_foo
& 14;
290 i64 v_parta
= v_and
<< 3;
291 i64 v_partb
= ((ui64
)v_and
) >> 61;
292 i64 v_rotl
= v_parta
| v_partb
;
296 // Test a wrap-around case in which the AND comes before a rotate.
297 // This again needs a separate mask and rotate.
300 /* { dg-final { scan-assembler "f26:\n\tnill\t%r2,65487\n\trll\t%r2,%r2,5" } } */
301 i32 v_and
= v_foo
& -49;
302 i32 v_parta
= v_and
<< 5;
303 i32 v_partb
= ((ui32
)v_and
) >> 27;
304 i32 v_rotl
= v_parta
| v_partb
;
308 // ...and again with i64, where a single RISBG is OK.
311 /* { dg-final { scan-assembler "f27:\n\trisbg\t%r2,%r2,55,128\\\+52,5" { target { lp64 } } } } */
312 /* { dg-final { scan-assembler "f27:\n\trisbg\t%r3,%r2,0,0\\\+32-1,64-0-32\n\trisbg\t%r2,%r3,55,128\\\+52,5" { target { ! lp64 } } } } */
313 i64 v_and
= v_foo
& -49;
314 i64 v_parta
= v_and
<< 5;
315 i64 v_partb
= ((ui64
)v_and
) >> 59;
316 i64 v_rotl
= v_parta
| v_partb
;
320 // Test a case where the AND comes before a shift left.
323 /* { dg-final { scan-assembler "f28:\n\trisbg\t%r2,%r2,32,128\\\+45,17" } } */
324 i32 v_and
= v_foo
& 32766;
325 i32 v_shl
= v_and
<< 17;
329 // ...and again with i64.
332 /* { dg-final { scan-assembler "f29:\n\trisbg\t%r2,%r2,0,128\\\+13,49" { target { lp64 } } } } */
333 /* { dg-final { scan-assembler "f29:\n\trisbg\t%r\[23\],%r3,0,128\\\+13,49\n\tlr\t%r\[23\],%r\[32\]\n\tsrlg\t%r2,%r2" { target { ! lp64 } } } } */
334 i64 v_and
= v_foo
& 32766;
335 i64 v_shl
= v_and
<< 49;
339 // Test the next shift up from f28, in which the mask should get shortened.
342 /* { dg-final { scan-assembler "f30:\n\trisbg\t%r2,%r2,32,128\\\+44,18" } } */
343 i32 v_and
= v_foo
& 32766;
344 i32 v_shl
= v_and
<< 18;
348 // ...and again with i64.
351 /* { dg-final { scan-assembler "f31:\n\trisbg\t%r2,%r2,0,128\\\+12,50" { target { lp64 } } } } */
352 /* { dg-final { scan-assembler "f31:\n\trisbg\t%r\[23\],%r3,0,128\\\+12,50\n\tlr\t%r\[23\],%r\[32\]\n\tsrlg\t%r2,%r2" { target { ! lp64 } } } } */
353 i64 v_and
= v_foo
& 32766;
354 i64 v_shl
= v_and
<< 50;
358 // Test a wrap-around case in which the shift left comes after the AND.
359 // We can't use RISBG for the shift in that case.
362 /* { dg-final { scan-assembler "f32:\n\tsll\t%r2,10\n\tnill\t%r2,58368" } } */
363 i32 v_and
= v_foo
& -7;
364 i32 v_shl
= v_and
<< 10;
368 // ...and again with i64.
371 /* { dg-final { scan-assembler "f33:\n\tsllg\t%r2,%r2,10\n\tnill\t%r2,58368" { target { lp64 } } } } */
372 /* { dg-final { scan-assembler "f33:\n\trisbg\t%r3,%r2,0,0\\\+32-1,64-0-32\n\tsllg\t%r2,%r3,10\n\tnill\t%r2,58368" { target { ! lp64 } } } } */
373 i64 v_and
= v_foo
& -7;
374 i64 v_shl
= v_and
<< 10;
378 // Test a case where the AND comes before a shift right.
381 /* { dg-final { scan-assembler "f34:\n\trisbg\t%r2,%r2,64-7,128\\\+63,48\\\+7" } } */
382 i32 v_and
= v_foo
& 65535;
383 i32 v_shl
= ((ui32
)v_and
) >> 9;
387 // ...and again with i64.
390 /* { dg-final { scan-assembler "f35:\n\trisbg\t%r2,%r2,64-7,128\\\+63,48\\\+7" { target { lp64 } } } } */
391 /* { dg-final { scan-assembler "f35:\n\trisbg\t%r3,%r3,64-7,128\\\+63,48\\\+7\n\tlhi\t%r2,0" { target { ! lp64 } } } } */
392 i64 v_and
= v_foo
& 65535;
393 i64 v_shl
= ((ui64
)v_and
) >> 9;
397 // Test a wrap-around case where the AND comes before a shift right.
398 // We can't use RISBG for the shift in that case.
401 /* { dg-final { scan-assembler "f36:\n\tsrl\t%r2,1\n\tlgfi\t%r1,2147483635\n\tngr\t%r2,%r1" { target { lp64 } } } } */
402 /* { dg-final { scan-assembler "f36:\n\tsrl\t%r2,1\n\tnilf\t%r2,2147483635" { target { ! lp64 } } } } */
403 i32 v_and
= v_foo
& -25;
404 i32 v_shl
= ((ui32
)v_and
) >> 1;
408 // ...and again with i64.
411 /* { dg-final { scan-assembler "f37:\n\(\t.*\n\)*\tsrlg\t%r2,%r2,1\n\tng\t%r2," { target { lp64 } } } } */
412 /* { dg-final { scan-assembler "f37:\n\(\t.*\n\)*\trisbg\t%r3,%r2,0,0\\\+32-1,64-0-32\n\tsrlg\t%r2,%r3,1\n\tng\t%r2," { target { ! lp64 } } } } */
413 i64 v_and
= v_foo
& -25;
414 i64 v_shl
= ((ui64
)v_and
) >> 1;
418 // Test a combination involving a large ASHR and a shift left. We can't
422 /* { dg-final { scan-assembler "f38:\n\tsrag\t%r2,%r2,32\n\tsllg\t%r2,%r2,5" { target { lp64 } } } } */
423 /* { dg-final { scan-assembler "f38:\n\trisbg\t%r3,%r2,0,0\\\+32-1,64-0-32\n\tsrag\t%r2,%r3,32\n\tsllg\t%r2,%r2,5" { target { ! lp64 } } } } */
424 i64 v_ashr
= v_foo
>> 32;
425 i64 v_shl
= v_ashr
<< 5;
429 // Try a similar thing in which no shifted sign bits are kept.
430 i64
f39 (i64 v_foo
, i64
*v_dest
)
432 /* { dg-final { scan-assembler "f39:\n\tsrag\t%r2,%r2,35\n\(\t.*\n\)*\trisbg\t%r2,%r2,33,128\\\+61,2" { target { lp64 } } } } */
433 /* { dg-final { scan-assembler "f39:\n\trisbg\t%r3,%r2,0,0\\\+32-1,64-0-32\n\tlhi\t%r2,0\n\tsrag\t%r3,%r3,35\n\(\t.*\n\)*\trisbg\t%r3,%r3,33,128\\\+61,2" { target { ! lp64 } } } } */
434 i64 v_ashr
= v_foo
>> 35;
436 i64 v_shl
= v_ashr
<< 2;
437 i64 v_and
= v_shl
& 2147483647;
441 // ...and again with the next highest shift value, where one sign bit is kept.
442 i64
f40 (i64 v_foo
, i64
*v_dest
)
444 /* { dg-final { scan-assembler "f40:\n\tsrag\t%r2,%r2,36\n\(\t.*\n\)*\trisbg\t%r2,%r2,33,128\\\+61,2" { target { lp64 } } } } */
445 /* { dg-final { scan-assembler "f40:\n\trisbg\t%r3,%r2,0,0\\\+32-1,64-0-32\n\tlhi\t%r2,0\n\tsrag\t%r3,%r3,36\n\(\t.*\n\)*\trisbg\t%r3,%r3,33,128\\\+61,2" { target { ! lp64 } } } } */
446 i64 v_ashr
= v_foo
>> 36;
448 i64 v_shl
= v_ashr
<< 2;
449 i64 v_and
= v_shl
& 2147483647;
453 // Check a case where the result is zero-extended.
456 /* { dg-final { scan-assembler "f41:\n\trisbg\t%r2,%r2,64-28,128\\\+63,34\\\+28" { target { lp64 } } } } */
457 /* { dg-final { scan-assembler "f41:\n\trisbg\t%r3,%r2,64-28,128\\\+63,34\\\+28\n\tlhi\t%r2,0" { target { ! lp64 } } } } */
458 i32 v_shl
= v_a
<< 2;
459 i32 v_shr
= ((ui32
)v_shl
) >> 4;
460 i64 v_ext
= (ui64
)v_shr
;
464 // In this case the sign extension is converted to a pair of 32-bit shifts,
465 // which is then extended to 64 bits. We previously used the wrong bit size
466 // when testing whether the shifted-in bits of the shift right were significant.
467 typedef struct { ui64 pad
: 63; ui8 a
: 1; } t42
;
470 /* { dg-final { scan-assembler "f42:\n\tsllg\t%r2,%r2,63\n\tsrag\t%r2,%r2,63\n\tllgcr\t%r2,%r2" { target { lp64 } } } } */
471 /* { dg-final { scan-assembler "f42:\n\tsllg\t%r3,%r3,63\n\tlhi\t%r2,0\n\tsrag\t%r3,%r3,63\n\tllcr\t%r3,%r3" { target { ! lp64 } } } } */
473 i8 ext
= ((i8
)a
) >> 7;
474 i64 ext2
= (ui64
)(ui8
)ext
;
478 // Check that we get the case where a 64-bit shift is used by a 32-bit and.
481 /* { dg-final { scan-assembler "f43:\n\trisbg\t%r2,%r2,32,128\\\+61,64-12" { target { lp64 } } } } */
482 /* { dg-final { scan-assembler "f43:\n\trisbg\t%r3,%r2,0,0\\\+32-1,64-0-32\n\trisbg\t%r2,%r3,32,128\\\+61,64-12" { target { ! lp64 } } } } */
483 i64 v_shr3
= ((ui64
)v_x
) >> 12;
484 i32 v_shr3_tr
= (ui32
)v_shr3
;
485 i32 v_conv
= v_shr3_tr
& -4;
489 // Check that we don't get the case where the 32-bit and mask is not contiguous
492 /* { dg-final { scan-assembler "f44:\n\tsrlg\t%r2,%r2,12" { target { lp64 } } } } */
493 /* { dg-final { scan-assembler "f44:\n\tsrlg\t%r2,%r3,12\n\tnilf\t%r2,10" { target { ! lp64 } } } } */
494 i64 v_shr4
= ((ui64
)v_x
) >> 12;
495 i32 v_conv
= (ui32
)v_shr4
;
496 i32 v_and
= v_conv
& 10;