1 ; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s
2 ; Obtained from C source as:
3 ; clang -S -emit-llvm -O0 vect-strided-u32-i8.c
4 ; opt -correlated-propagation -mem2reg -instcombine -loop-simplify -indvars -instnamer vect-strided-u32-i8.s > vect-strided-u32-i8.ll
6 ; ModuleID = 'vect-strided-u32-i8.s'
7 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
8 target triple = "x86_64-unknown-linux-gnu"
10 %struct.s = type { i32, i32, i32, i32, i32, i32, i32, i32 }
12 define i32 @main1(%struct.s* %arr) nounwind uwtable noinline {
14 %res = alloca [128 x %struct.s], align 16
17 for.cond: ; preds = %for.inc, %entry
18 %indvar16 = phi i64 [ %indvar.next17, %for.inc ], [ 0, %entry ]
19 %g118 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar16, i32 6
20 %h99 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar16, i32 7
21 %e88 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar16, i32 4
22 %f78 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar16, i32 5
23 %b68 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar16, i32 1
24 %d50 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar16, i32 3
25 %scevgep24 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar16
26 %a31 = bitcast %struct.s* %scevgep24 to i32*
27 %c13 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar16, i32 2
28 %c = getelementptr %struct.s* %arr, i64 %indvar16, i32 2
29 %d = getelementptr %struct.s* %arr, i64 %indvar16, i32 3
30 %scevgep29 = getelementptr %struct.s* %arr, i64 %indvar16
31 %a = bitcast %struct.s* %scevgep29 to i32*
32 %b = getelementptr %struct.s* %arr, i64 %indvar16, i32 1
33 %g = getelementptr %struct.s* %arr, i64 %indvar16, i32 6
34 %e = getelementptr %struct.s* %arr, i64 %indvar16, i32 4
35 %h = getelementptr %struct.s* %arr, i64 %indvar16, i32 7
36 %f = getelementptr %struct.s* %arr, i64 %indvar16, i32 5
37 %exitcond = icmp ne i64 %indvar16, 128
38 br i1 %exitcond, label %for.body, label %for.end
40 for.body: ; preds = %for.cond
41 %tmp4 = load i32* %b, align 4
42 %tmp6 = load i32* %a, align 4
43 %sub = sub nsw i32 %tmp4, %tmp6
44 %tmp8 = load i32* %d, align 4
45 %add = add nsw i32 %sub, %tmp8
46 %tmp10 = load i32* %c, align 4
47 %sub11 = sub nsw i32 %add, %tmp10
48 store i32 %sub11, i32* %c13, align 8
49 %tmp16 = load i32* %a, align 4
50 %tmp18 = load i32* %g, align 4
51 %add19 = add nsw i32 %tmp16, %tmp18
52 %tmp22 = load i32* %b, align 4
53 %add23 = add nsw i32 %add19, %tmp22
54 %tmp26 = load i32* %d, align 4
55 %add27 = add nsw i32 %add23, %tmp26
56 store i32 %add27, i32* %a31, align 16
57 %tmp34 = load i32* %b, align 4
58 %tmp37 = load i32* %a, align 4
59 %sub38 = sub nsw i32 %tmp34, %tmp37
60 %tmp41 = load i32* %d, align 4
61 %add42 = add nsw i32 %sub38, %tmp41
62 %tmp45 = load i32* %c, align 4
63 %sub46 = sub nsw i32 %add42, %tmp45
64 store i32 %sub46, i32* %d50, align 4
65 %tmp52 = load i32* %h, align 4
66 %tmp55 = load i32* %a, align 4
67 %sub56 = sub nsw i32 %tmp52, %tmp55
68 %tmp59 = load i32* %d, align 4
69 %add60 = add nsw i32 %sub56, %tmp59
70 %tmp63 = load i32* %c, align 4
71 %sub64 = sub nsw i32 %add60, %tmp63
72 store i32 %sub64, i32* %b68, align 4
73 %tmp70 = load i32* %f, align 4
74 %tmp73 = load i32* %h, align 4
75 %add74 = add nsw i32 %tmp70, %tmp73
76 store i32 %add74, i32* %f78, align 4
77 %tmp81 = load i32* %b, align 4
78 %tmp83 = load i32* %e, align 4
79 %sub84 = sub nsw i32 %tmp81, %tmp83
80 store i32 %sub84, i32* %e88, align 16
81 %tmp91 = load i32* %d, align 4
82 %tmp94 = load i32* %g, align 4
83 %sub95 = sub nsw i32 %tmp91, %tmp94
84 store i32 %sub95, i32* %h99, align 4
85 %tmp102 = load i32* %b, align 4
86 %tmp105 = load i32* %a, align 4
87 %sub106 = sub nsw i32 %tmp102, %tmp105
88 %tmp109 = load i32* %d, align 4
89 %add110 = add nsw i32 %sub106, %tmp109
90 %tmp113 = load i32* %c, align 4
91 %sub114 = sub nsw i32 %add110, %tmp113
92 store i32 %sub114, i32* %g118, align 8
95 for.inc: ; preds = %for.body
96 %indvar.next17 = add i64 %indvar16, 1
99 for.end: ; preds = %for.cond
100 br label %for.cond121
102 for.cond121: ; preds = %for.inc353, %for.end
103 %indvar = phi i64 [ %indvar.next, %for.inc353 ], [ 0, %for.end ]
104 %c154 = getelementptr %struct.s* %arr, i64 %indvar, i32 2
105 %d147 = getelementptr %struct.s* %arr, i64 %indvar, i32 3
106 %scevgep = getelementptr %struct.s* %arr, i64 %indvar
107 %a140 = bitcast %struct.s* %scevgep to i32*
108 %b134 = getelementptr %struct.s* %arr, i64 %indvar, i32 1
109 %g323 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar, i32 6
110 %g173 = getelementptr %struct.s* %arr, i64 %indvar, i32 6
111 %h303 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar, i32 7
112 %e295 = getelementptr %struct.s* %arr, i64 %indvar, i32 4
113 %e283 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar, i32 4
114 %h235 = getelementptr %struct.s* %arr, i64 %indvar, i32 7
115 %f269 = getelementptr %struct.s* %arr, i64 %indvar, i32 5
116 %f263 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar, i32 5
117 %b229 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar, i32 1
118 %d195 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar, i32 3
119 %scevgep13 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar
120 %a161 = bitcast %struct.s* %scevgep13 to i32*
121 %c128 = getelementptr [128 x %struct.s]* %res, i64 0, i64 %indvar, i32 2
122 %i.1 = trunc i64 %indvar to i32
123 %cmp123 = icmp slt i32 %i.1, 128
124 br i1 %cmp123, label %for.body124, label %for.end356
126 for.body124: ; preds = %for.cond121
127 %tmp129 = load i32* %c128, align 8
128 %tmp135 = load i32* %b134, align 4
129 %tmp141 = load i32* %a140, align 4
130 %sub142 = sub nsw i32 %tmp135, %tmp141
131 %tmp148 = load i32* %d147, align 4
132 %add149 = add nsw i32 %sub142, %tmp148
133 %tmp155 = load i32* %c154, align 4
134 %sub156 = sub nsw i32 %add149, %tmp155
135 %cmp157 = icmp eq i32 %tmp129, %sub156
136 br i1 %cmp157, label %lor.lhs.false, label %if.then
138 lor.lhs.false: ; preds = %for.body124
139 %tmp162 = load i32* %a161, align 16
140 %tmp168 = load i32* %a140, align 4
141 %tmp174 = load i32* %g173, align 4
142 %add175 = add nsw i32 %tmp168, %tmp174
143 %tmp181 = load i32* %b134, align 4
144 %add182 = add nsw i32 %add175, %tmp181
145 %tmp188 = load i32* %d147, align 4
146 %add189 = add nsw i32 %add182, %tmp188
147 %cmp190 = icmp eq i32 %tmp162, %add189
148 br i1 %cmp190, label %lor.lhs.false191, label %if.then
150 lor.lhs.false191: ; preds = %lor.lhs.false
151 %tmp196 = load i32* %d195, align 4
152 %tmp202 = load i32* %b134, align 4
153 %tmp208 = load i32* %a140, align 4
154 %sub209 = sub nsw i32 %tmp202, %tmp208
155 %tmp215 = load i32* %d147, align 4
156 %add216 = add nsw i32 %sub209, %tmp215
157 %tmp222 = load i32* %c154, align 4
158 %sub223 = sub nsw i32 %add216, %tmp222
159 %cmp224 = icmp eq i32 %tmp196, %sub223
160 br i1 %cmp224, label %lor.lhs.false225, label %if.then
162 lor.lhs.false225: ; preds = %lor.lhs.false191
163 %tmp230 = load i32* %b229, align 4
164 %tmp236 = load i32* %h235, align 4
165 %tmp242 = load i32* %a140, align 4
166 %sub243 = sub nsw i32 %tmp236, %tmp242
167 %tmp249 = load i32* %d147, align 4
168 %add250 = add nsw i32 %sub243, %tmp249
169 %tmp256 = load i32* %c154, align 4
170 %sub257 = sub nsw i32 %add250, %tmp256
171 %cmp258 = icmp eq i32 %tmp230, %sub257
172 br i1 %cmp258, label %lor.lhs.false259, label %if.then
174 lor.lhs.false259: ; preds = %lor.lhs.false225
175 %tmp264 = load i32* %f263, align 4
176 %tmp270 = load i32* %f269, align 4
177 %tmp276 = load i32* %h235, align 4
178 %add277 = add nsw i32 %tmp270, %tmp276
179 %cmp278 = icmp eq i32 %tmp264, %add277
180 br i1 %cmp278, label %lor.lhs.false279, label %if.then
182 lor.lhs.false279: ; preds = %lor.lhs.false259
183 %tmp284 = load i32* %e283, align 16
184 %tmp290 = load i32* %b134, align 4
185 %tmp296 = load i32* %e295, align 4
186 %sub297 = sub nsw i32 %tmp290, %tmp296
187 %cmp298 = icmp eq i32 %tmp284, %sub297
188 br i1 %cmp298, label %lor.lhs.false299, label %if.then
190 lor.lhs.false299: ; preds = %lor.lhs.false279
191 %tmp304 = load i32* %h303, align 4
192 %tmp310 = load i32* %d147, align 4
193 %tmp316 = load i32* %g173, align 4
194 %sub317 = sub nsw i32 %tmp310, %tmp316
195 %cmp318 = icmp eq i32 %tmp304, %sub317
196 br i1 %cmp318, label %lor.lhs.false319, label %if.then
198 lor.lhs.false319: ; preds = %lor.lhs.false299
199 %tmp324 = load i32* %g323, align 8
200 %tmp330 = load i32* %b134, align 4
201 %tmp336 = load i32* %a140, align 4
202 %sub337 = sub nsw i32 %tmp330, %tmp336
203 %tmp343 = load i32* %d147, align 4
204 %add344 = add nsw i32 %sub337, %tmp343
205 %tmp350 = load i32* %c154, align 4
206 %sub351 = sub nsw i32 %add344, %tmp350
207 %cmp352 = icmp eq i32 %tmp324, %sub351
208 br i1 %cmp352, label %if.end, label %if.then
210 if.then: ; preds = %lor.lhs.false319, %lor.lhs.false299, %lor.lhs.false279, %lor.lhs.false259, %lor.lhs.false225, %lor.lhs.false191, %lor.lhs.false, %for.body124
211 call void @abort() noreturn nounwind
214 if.end: ; preds = %lor.lhs.false319
217 for.inc353: ; preds = %if.end
218 %indvar.next = add i64 %indvar, 1
219 br label %for.cond121
221 for.end356: ; preds = %for.cond121
225 declare void @abort() noreturn
227 define i32 @main() nounwind uwtable {
229 %arr = alloca [128 x %struct.s], align 16
230 call void @check_vect()
233 for.cond: ; preds = %for.inc, %entry
234 %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
235 %tmp = add i64 %indvar, -3
236 %sub = trunc i64 %tmp to i32
237 %tmp2 = mul i64 %indvar, 2
238 %mul = trunc i64 %tmp2 to i32
239 %scevgep = getelementptr [128 x %struct.s]* %arr, i64 0, i64 %indvar
240 %a = bitcast %struct.s* %scevgep to i32*
241 %h = getelementptr [128 x %struct.s]* %arr, i64 0, i64 %indvar, i32 7
242 %g = getelementptr [128 x %struct.s]* %arr, i64 0, i64 %indvar, i32 6
243 %f = getelementptr [128 x %struct.s]* %arr, i64 0, i64 %indvar, i32 5
244 %tmp8 = mul i64 %indvar, 5
245 %mul21 = trunc i64 %tmp8 to i32
246 %e = getelementptr [128 x %struct.s]* %arr, i64 0, i64 %indvar, i32 4
247 %tmp11 = mul i64 %indvar, 3
248 %tmp12 = add i64 %tmp11, 5
249 %add16 = trunc i64 %tmp12 to i32
250 %d = getelementptr [128 x %struct.s]* %arr, i64 0, i64 %indvar, i32 3
251 %tmp15 = add i64 %indvar, 34
252 %add = trunc i64 %tmp15 to i32
253 %c = getelementptr [128 x %struct.s]* %arr, i64 0, i64 %indvar, i32 2
254 %b = getelementptr [128 x %struct.s]* %arr, i64 0, i64 %indvar, i32 1
255 %i.0 = trunc i64 %indvar to i32
256 %cmp = icmp slt i32 %i.0, 128
257 br i1 %cmp, label %for.body, label %for.end
259 for.body: ; preds = %for.cond
260 store i32 %i.0, i32* %a, align 16
261 store i32 %mul, i32* %b, align 4
262 store i32 17, i32* %c, align 8
263 store i32 %add, i32* %d, align 4
264 store i32 %add16, i32* %e, align 16
265 store i32 %mul21, i32* %f, align 4
266 store i32 %sub, i32* %g, align 8
267 store i32 56, i32* %h, align 4
268 %tmp36 = load i32* %a, align 16
269 %cmp37 = icmp eq i32 %tmp36, 178
270 br i1 %cmp37, label %if.then, label %if.end
272 if.then: ; preds = %for.body
273 call void @abort() noreturn nounwind
276 if.end: ; preds = %for.body
279 for.inc: ; preds = %if.end
280 %indvar.next = add i64 %indvar, 1
283 for.end: ; preds = %for.cond
284 %arraydecay = getelementptr inbounds [128 x %struct.s]* %arr, i64 0, i64 0
285 %call = call i32 @main1(%struct.s* %arraydecay)
289 define internal void @check_vect() nounwind uwtable noinline {
291 %a = alloca i32, align 4
292 %b = alloca i32, align 4
293 %c = alloca i32, align 4
294 %d = alloca i32, align 4
295 %call = call void (i32)* (i32, void (i32)*)* @signal(i32 4, void (i32)* @sig_ill_handler) nounwind
296 %call1 = call i32 @__get_cpuid(i32 1, i32* %a, i32* %b, i32* %c, i32* %d)
297 %tobool = icmp eq i32 %call1, 0
298 br i1 %tobool, label %if.then, label %lor.lhs.false
300 lor.lhs.false: ; preds = %entry
301 %tmp4 = load i32* %d, align 4
302 %and6 = and i32 %tmp4, 67108864
303 %cmp = icmp eq i32 %and6, 0
304 br i1 %cmp, label %if.then, label %if.end
306 if.then: ; preds = %entry, %lor.lhs.false
307 call void @exit(i32 0) noreturn nounwind
310 if.end: ; preds = %lor.lhs.false
311 %call7 = call void (i32)* (i32, void (i32)*)* @signal(i32 4, void (i32)* null) nounwind
315 declare void (i32)* @signal(i32, void (i32)*) nounwind
317 define internal void @sig_ill_handler(i32 %sig) nounwind uwtable {
319 call void @exit(i32 0) noreturn nounwind
322 return: ; No predecessors!
326 define internal i32 @__get_cpuid(i32 %__level, i32* %__eax, i32* %__ebx, i32* %__ecx, i32* %__edx) nounwind uwtable inlinehint {
328 %and = and i32 %__level, -2147483648
329 %call = call i32 @__get_cpuid_max(i32 %and, i32* null)
330 %cmp = icmp ult i32 %call, %__level
331 br i1 %cmp, label %if.then, label %if.end
333 if.then: ; preds = %entry
336 if.end: ; preds = %entry
337 %tmp = call { i32, i32, i32, i32 } asm "cpuid\0A\09", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 %__level) nounwind, !srcloc !0
338 %asmresult = extractvalue { i32, i32, i32, i32 } %tmp, 0
339 %asmresult8 = extractvalue { i32, i32, i32, i32 } %tmp, 1
340 %asmresult9 = extractvalue { i32, i32, i32, i32 } %tmp, 2
341 %asmresult10 = extractvalue { i32, i32, i32, i32 } %tmp, 3
342 store i32 %asmresult, i32* %__eax, align 4
343 store i32 %asmresult8, i32* %__ebx, align 4
344 store i32 %asmresult9, i32* %__ecx, align 4
345 store i32 %asmresult10, i32* %__edx, align 4
348 return: ; preds = %if.end, %if.then
349 %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.end ]
353 declare void @exit(i32) noreturn
355 define internal i32 @__get_cpuid_max(i32 %__ext, i32* %__sig) nounwind uwtable inlinehint {
357 %tmp = call { i32, i32, i32, i32 } asm "cpuid\0A\09", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 %__ext) nounwind, !srcloc !1
358 %asmresult = extractvalue { i32, i32, i32, i32 } %tmp, 0
359 %tobool = icmp eq i32* %__sig, null
360 br i1 %tobool, label %if.end, label %if.then
362 if.then: ; preds = %entry
363 %asmresult1 = extractvalue { i32, i32, i32, i32 } %tmp, 1
364 store i32 %asmresult1, i32* %__sig, align 4
367 if.end: ; preds = %entry, %if.then
371 !0 = metadata !{i32 -2147342537, i32 -2147342529}
372 !1 = metadata !{i32 -2147342660, i32 -2147342652}